{ "cells": [ { "cell_type": "markdown", "metadata": { "toc": "true" }, "source": [ "# Table of Contents\n", "
" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2017-01-03T12:43:22.807044", "start_time": "2017-01-03T12:43:21.912623" }, "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "pd.options.display.max_rows = 200\n", "import os" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2017-01-03T12:43:22.813211", "start_time": "2017-01-03T12:43:22.808775" }, "collapsed": true }, "outputs": [], "source": [ "dir_ = '/Users/jeriwieringa/Dissertation/drafts/data/spelling-statistics/round4/'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2017-01-03T12:43:23.127572", "start_time": "2017-01-03T12:43:23.120606" }, "collapsed": true }, "outputs": [], "source": [ "titles = [\"ADV\", \"AmSn\", \"ARAI\", \"CE\", \"CUV\", \"EDU\", \"GCB\", \"GH\", \"GOH\", \"GS\", \"HM\", \"HR\", \n", " \"IR\", \"LB\", \"LH\", \"LibM\", \"LUH\", \"NMN\",\"PHJ\",\"PTAR\",\"PUR\",\"RH\",\"Sligo\",\"SOL\",\n", " \"ST\",\"SUW\",\"TCOG\",\"TMM\",\"WMH\",\"YI\"]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2017-01-03T12:43:23.843597", "start_time": "2017-01-03T12:43:23.838146" }, "collapsed": true }, "outputs": [], "source": [ "def results_to_df(title):\n", " for filename in os.listdir(dir_):\n", " if filename.endswith(\"{}.txt\".format(title)):\n", " df = pd.read_csv(dir_ + filename)\n", " df['word_length'] = df['spell_error'].str.len()\n", " return(df) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As the goal here is to identify words that should be added to the spell check list, I am dropping all words with a count of \"1\" and all single letter words." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2017-01-03T12:43:25.371620", "start_time": "2017-01-03T12:43:25.367983" }, "collapsed": false }, "outputs": [], "source": [ "def query_df(df, count_value, length_value, sort_by):\n", " return(df.query('count > {} & word_length > {}'.format(count_value, length_value)).sort_values(sort_by, ascending=False))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T16:17:54.986152", "start_time": "2016-12-12T16:17:54.909424" }, "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for ADV:\n", " spell_error count word_length\n", "6805 tion 807 4\n", "345 dren 329 4\n", "8406 chil 326 4\n", "4459 educa 323 5\n", "13967 ment 304 4\n", "8620 n't 234 3\n", "5858 ers 208 3\n", "3554 tions 202 5\n", "8709 edu 175 3\n", "10179 pre 174 3\n", "4965 ence 160 4\n", "6314 ple 135 3\n", "10154 mis 135 3\n", "10621 tional 128 6\n", "9562 tian 126 4\n", "853 ith 125 3\n", "9528 ful 112 3\n", "10503 ments 98 5\n", "10823 ent 96 3\n", "7684 ber 96 3\n", "13643 peo 93 3\n", "7142 ofthe 90 5\n", "13878 prin 87 4\n", "10864 ture 84 4\n", "5326 ucation 81 7\n", "7257 struction 74 9\n", "4823 chas 70 4\n", "12444 lege 68 4\n", "15542 ance 68 4\n", "4999 sionary 68 7\n", "1990 ents 67 4\n", "13409 tem 65 3\n", "13980 agt 64 3\n", "3586 ciples 64 6\n", "1020 ary 63 3\n", "921 ble 63 3\n", "12270 ual 58 3\n", "9463 ure 55 3\n", "1797 ference 54 7\n", "8404 bers 52 4\n", "1527 ject 52 4\n", "2843 sys 51 3\n", "2729 instruc 50 7\n", "11096 experi 50 6\n", "196 tle 49 3\n", "4705 coun 49 4\n", "3448 princi 47 6\n", "8834 'll 46 3\n", "7217 knowl 45 5\n", "1841 dustrial 45 8\n", "15997 ning 43 4\n", "2687 accom 43 5\n", "12501 ical 42 4\n", "15325 eral 42 4\n", "6481 prac 42 4\n", "7975 ples 41 4\n", "4601 ters 41 4\n", "166 har 41 3\n", "10144 sloyd 40 5\n", "6903 perience 40 8\n", "9696 cational 40 8\n", "16693 lan 40 3\n", "16430 ork 39 3\n", "11955 oppor 39 5\n", "3870 suc 39 3\n", "149 tained 39 6\n", "420 mer 39 3\n", "13245 lished 39 6\n", "3502 tis 39 3\n", "7184 impor 38 5\n", "10655 hile 38 4\n", "8931 institu 38 7\n", "4095 dif 38 3\n", "939 sions 35 5\n", "16133 tance 35 5\n", "1323 ucational 35 9\n", "781 neces 34 5\n", "3302 estab 34 5\n", "14439 anoka 34 5\n", "1859 ceived 34 6\n", "10554 tjt 33 3\n", "7960 tbe 33 3\n", "11823 ver 33 3\n", "6768 arith 33 5\n", "15626 tunity 32 6\n", "12094 prepara 32 7\n", "6255 sible 31 5\n", "15437 partment 31 8\n", "1290 wil 31 3\n", "6455 dred 31 4\n", "11231 pils 31 4\n", "17002 tary 31 4\n", "16588 proph 31 5\n", "2160 ered 31 4\n", "7200 direc 31 5\n", "2411 dence 30 5\n", "842 jects 30 5\n", "9964 ous 30 3\n", "14996 tlie 30 4\n", "16270 tive 29 4\n", "... ... ... ...\n", "14571 ington 4 6\n", "14564 'of 4 3\n", "8981 quence 4 6\n", "9008 ofour 4 5\n", "9012 riences 4 7\n", "9050 gradu 4 5\n", "9051 investi 4 7\n", "9077 ’ou 4 3\n", "9098 kankakee 4 8\n", "14437 cident 4 6\n", "9223 nally 4 5\n", "9235 father’s 4 8\n", "14417 txi 4 3\n", "9291 sibilities 4 10\n", "1607 ucators 4 7\n", "14382 strated 4 7\n", "9347 mented 4 6\n", "9436 vis 4 3\n", "14305 lub 4 3\n", "14595 secretaryof 4 11\n", "14611 prindle 4 7\n", "2083 sul 4 3\n", "4667 windham 4 7\n", "14884 ganize 4 6\n", "14868 tainment 4 8\n", "14862 geni 4 4\n", "1412 perma 4 5\n", "8630 arner 4 5\n", "8656 departm 4 7\n", "8671 expi 4 4\n", "4858 slialt 4 6\n", "14785 erence 4 6\n", "8736 citv 4 4\n", "4826 dicate 4 6\n", "14775 wrhat 4 5\n", "1446 tbeir 4 5\n", "4814 ously 4 5\n", "14748 baby’s 4 6\n", "4786 expedted 4 8\n", "14704 tists 4 5\n", "14692 mained 4 6\n", "14691 astrong 4 7\n", "4715 school' 4 7\n", "1460 pers 4 4\n", "14665 sota 4 4\n", "2989 diredt 4 6\n", "4511 jxrir 4 5\n", "14287 kellar 4 6\n", "4510 uncon 4 5\n", "4488 dia 4 3\n", "13727 jno 4 3\n", "1873 cun 4 3\n", "4303 thejr 4 5\n", "13721 servation 4 9\n", "13709 und 4 3\n", "1888 tobin 4 5\n", "9890 tral 4 4\n", "9913 farmington 4 10\n", "4288 gravsville 4 10\n", "1936 foi 4 3\n", "4258 eord 4 4\n", "10035 gbaw 4 4\n", "4211 cesses 4 6\n", "10115 guages 4 6\n", "13584 tenance 4 7\n", "1947 satisfac 4 8\n", "10174 conclu 4 6\n", "4119 oti 4 3\n", "4066 freshies 4 8\n", "1982 dic 4 3\n", "10243 buluwayo 4 8\n", "10284 christain 4 9\n", "10309 'to 4 3\n", "4319 centsayear 4 10\n", "13763 theless 4 7\n", "4338 cuse 4 4\n", "14139 pelled 4 6\n", "4444 clared 4 6\n", "4438 ioi 4 3\n", "1663 jhe 4 3\n", "9591 byr 4 3\n", "4431 blos 4 4\n", "14243 gowdy 4 5\n", "9606 excep 4 5\n", "14224 ves 4 3\n", "9626 expla 4 5\n", "1698 atid 4 4\n", "1770 hol 4 3\n", "13791 fadts 4 5\n", "9660 tials 4 5\n", "9677 thk 4 3\n", "9712 atson 4 5\n", "1775 imi 4 3\n", "13995 clusively 4 9\n", "9723 trons 4 5\n", "9725 amination 4 9\n", "13964 tir 4 3\n", "9747 mbd 4 3\n", "13808 chinery 4 7\n", "11996 mieh 4 4\n", "\n", "[1490 rows x 3 columns]\n" ] } ], "source": [ "title = 'ADV'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T16:17:56.248156", "start_time": "2016-12-12T16:17:56.125444" }, "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for AmSn:\n", " spell_error count word_length\n", "22881 n't 2138 3\n", "20525 'the 431 4\n", "5986 tion 317 4\n", "12689 seventhday 258 10\n", "30463 indorsed 250 8\n", "43697 'of 232 3\n", "30788 satolli 230 7\n", "14935 employes 208 8\n", "57591 munn 206 4\n", "7727 'll 200 3\n", "35250 religio 195 7\n", "27685 ment 194 4\n", "47657 kee 187 3\n", "1332 cmsar 185 5\n", "21690 indorse 171 7\n", "46929 ringgold 167 8\n", "55394 'to 153 3\n", "6341 breckinridge 146 12\n", "18033 allister 138 8\n", "45304 pre 136 3\n", "51360 bateham 129 7\n", "43047 csar 119 4\n", "21218 schaff 119 6\n", "27616 aleck 112 5\n", "12493 socalled 108 8\n", "47956 erican 107 6\n", "28305 milly 103 5\n", "48878 sentin 103 6\n", "32546 capps 96 5\n", "58932 'is 95 3\n", "33977 tions 94 5\n", "13591 ican 94 4\n", "31801 palmeter 94 8\n", "58 neander 92 7\n", "40884 'that 91 5\n", "35412 're 86 3\n", "28440 eze 84 3\n", "40442 'and 83 4\n", "3341 'in 83 3\n", "46342 've 83 3\n", "59823 sundaylaw 81 9\n", "9881 epworth 81 7\n", "42692 messrs 81 6\n", "114 stundists 81 9\n", "56143 edmunds 80 7\n", "37278 cereola 79 7\n", "19514 chas 79 4\n", "23977 haskins 77 7\n", "21357 thi 76 3\n", "52560 ofthe 75 5\n", "32748 lld 74 3\n", "28194 freethought 67 11\n", "12991 coxey 64 5\n", "12455 connell 63 7\n", "27340 avenola 62 7\n", "2300 endeavorer 59 10\n", "45125 attaches 58 8\n", "9170 ments 58 5\n", "15882 rican 57 5\n", "44605 intrusted 57 9\n", "48111 tional 56 6\n", "37458 anierican 56 9\n", "9722 'not 56 4\n", "54911 paeifie 56 7\n", "6032 candidus 56 8\n", "14519 fifield 56 7\n", "5567 ple 55 3\n", "8285 geikie 54 6\n", "15599 indorsing 54 9\n", "44738 tregelles 54 9\n", "17668 ernment 54 7\n", "16292 employe 53 7\n", "5693 dred 53 4\n", "33027 ity 53 3\n", "729 depew 52 5\n", "47687 krug 51 4\n", "48086 obion 51 5\n", "39056 aivierican 51 10\n", "48366 assoeiation 51 11\n", "55214 englewood 50 9\n", "34162 inthe 50 5\n", "42041 litt 50 4\n", "55772 aro 50 3\n", "27069 cuyler 50 6\n", "57746 wellknown 50 9\n", "55693 sabbaththe 50 10\n", "40575 mallett 48 7\n", "39495 leiper 48 6\n", "23741 tian 48 4\n", "32400 cathedra 47 8\n", "30232 opposers 47 8\n", "30015 medo 46 4\n", "40613 'be 46 3\n", "40369 ent 45 3\n", "52072 kai 45 3\n", "34852 sundayclosing 45 13\n", "29829 stuttle 44 7\n", "18206 forit 44 5\n", "39807 judefind 44 8\n", "19273 keane 44 5\n", "... ... ... ...\n", "13666 ublished 4 8\n", "37236 banishments 4 11\n", "46641 'human 4 6\n", "48707 epi 4 3\n", "14494 irs 4 3\n", "15234 spiritand 4 9\n", "15179 'render 4 7\n", "23889 shbnah 4 6\n", "15134 'earth 4 6\n", "46068 'worship 4 8\n", "37357 beauti 4 6\n", "15033 libertythat 4 11\n", "37346 ganization 4 10\n", "14972 principlea 4 10\n", "14910 mur 4 3\n", "23931 cas 4 3\n", "46207 craham 4 6\n", "23970 oneman 4 6\n", "14847 kingd 4 5\n", "24027 rality 4 6\n", "14782 'general 4 8\n", "14776 mens 4 4\n", "14732 pocus 4 5\n", "46344 beand 4 5\n", "14730 intermeddler 4 12\n", "14726 legislati 4 9\n", "46426 ballentine 4 10\n", "14645 bouvier 4 7\n", "37258 holydays 4 8\n", "14612 bickerings 4 10\n", "24192 tures 4 5\n", "46597 koenig 4 6\n", "46601 lene 4 4\n", "14549 tious 4 5\n", "47457 selfstyled 4 10\n", "36877 godlikeness 4 11\n", "13655 errone 4 6\n", "47529 eccle 4 5\n", "48097 bestto 4 6\n", "36459 gert 4 4\n", "48116 wie 4 3\n", "12564 fiftyfirst 4 10\n", "48197 thf 4 3\n", "36458 answera 4 7\n", "48246 twentyseven 4 11\n", "12553 peoplenot 4 9\n", "36445 sanctities 4 10\n", "48347 catholie 4 8\n", "12418 delambre 4 8\n", "48367 faiththe 4 8\n", "36426 acific 4 6\n", "48407 intyre 4 6\n", "12413 'much 4 5\n", "24739 powe 4 4\n", "36347 statemanship 4 12\n", "12279 eign 4 4\n", "12275 ormed 4 5\n", "12171 amv 4 3\n", "11993 discernable 4 11\n", "48516 dror 4 4\n", "11916 olneyville 4 10\n", "24809 itdividual 4 10\n", "11892 'ye 4 3\n", "48617 ited 4 4\n", "11881 purpo 4 5\n", "36305 innes 4 5\n", "11797 sonship 4 7\n", "24716 cisions 4 7\n", "12814 yosemit 4 7\n", "12931 consciencethis 4 14\n", "36683 theonly 4 7\n", "13602 ather 4 5\n", "13600 democratism 4 11\n", "13566 sawbath 4 7\n", "24478 frse 4 4\n", "47647 protectories 4 12\n", "36826 caulay 4 6\n", "47690 lishing 4 7\n", "24667 charleton 4 9\n", "13351 publishkng 4 10\n", "47734 fica 4 4\n", "24701 oom 4 3\n", "47746 rer 4 3\n", "47762 hol 4 3\n", "36660 cus 4 3\n", "13061 nrs 4 3\n", "47810 fastwill 4 8\n", "36652 appre 4 5\n", "36617 snd 4 3\n", "47878 tured 4 5\n", "13300 hackmen 4 7\n", "36609 suger 4 5\n", "36533 thig 4 4\n", "47931 engler 4 6\n", "13166 philomath 4 9\n", "13071 sabbatizing 4 11\n", "47968 oal 4 3\n", "47976 king' 4 5\n", "36522 thisis 4 6\n", "30490 moen 4 4\n", "\n", "[3381 rows x 3 columns]\n" ] } ], "source": [ "title = 'AmSn'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T16:17:57.303996", "start_time": "2016-12-12T16:17:57.284400" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for ARAI:\n", " spell_error count word_length\n", "162 rockyhill 9 9\n", "467 stowell 8 7\n", "455 cheo 7 4\n", "226 k'o 7 3\n", "476 sha 6 3\n", "341 parana 6 6\n", "453 friedenstal 6 11\n", "494 nyanza 5 6\n", "295 mch 5 3\n", "119 chitonga 5 8\n", "409 nyassa 5 6\n", "411 solusi 4 6\n", "20 gnedjen 4 7\n", "393 kavirondo 4 9\n", "202 vuasu 4 5\n", "160 majita 4 6\n", "123 rentfro 4 7\n", "350 somabula 4 8\n" ] } ], "source": [ "title = 'ARAI'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T16:17:57.889092", "start_time": "2016-12-12T16:17:57.844862" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for CE:\n", " spell_error count word_length\n", "8780 n't 727 3\n", "195 'll 162 3\n", "7207 manumental 133 10\n", "1408 kibbin 56 6\n", "7387 adelphian 52 9\n", "236 've 52 3\n", "6995 tion 45 4\n", "2986 millis 44 6\n", "4171 tis 43 3\n", "5404 're 42 3\n", "213 tle 37 3\n", "1158 tne 26 3\n", "6648 ginn 26 4\n", "958 ful 26 3\n", "4925 nurses' 24 7\n", "5580 claxton 24 7\n", "396 dren 24 4\n", "4485 murry 24 5\n", "6502 delpha 23 6\n", "1571 pre 23 3\n", "1989 lusio 22 5\n", "1692 dioxid 22 6\n", "4415 wyclif 22 6\n", "8195 myrta 22 5\n", "8289 revell 21 6\n", "4874 lomb 20 4\n", "4965 flexner 20 7\n", "5217 sheyenne 20 8\n", "4227 maplewood 20 9\n", "5570 taquary 20 7\n", "1760 tunesassa 20 9\n", "8572 chil 20 4\n", "7377 'ry 20 3\n", "5160 goldsberry 20 10\n", "502 lippincott 19 10\n", "6160 'the 19 4\n", "6867 ers 19 3\n", "970 preceptresses 19 13\n", "2741 bausch 18 6\n", "52 lul 18 3\n", "623 hildebran 18 9\n", "5396 ment 17 4\n", "7889 ber 17 3\n", "739 mvp 16 3\n", "6996 mvo 16 3\n", "828 plainview 16 9\n", "3222 laurelwood 16 10\n", "6503 imo 15 3\n", "3694 tions 15 5\n", "7005 eufola 14 6\n", "6621 anb 14 3\n", "346 haapai 14 6\n", "7209 teacherage 13 10\n", "204 cready 13 6\n", "8179 sus 13 3\n", "1548 mis 13 3\n", "5999 seventhday 13 10\n", "5674 syllabi 13 7\n", "5308 prin 13 4\n", "482 mer 13 3\n", "4547 rowell 13 6\n", "1709 'to 12 3\n", "2267 ture 12 4\n", "341 sloyd 11 5\n", "5992 'of 11 3\n", "318 ight 11 4\n", "3910 kernelocorn 11 11\n", "4847 exousia 11 7\n", "5075 halfyear 11 8\n", "76 adventista 11 10\n", "6484 it' 10 3\n", "848 vis 10 3\n", "7016 eldredge 10 8\n", "8178 jarnboas 10 8\n", "8381 eze 10 3\n", "6246 thos 10 4\n", "3410 bez 10 3\n", "7484 ioo 10 3\n", "6147 seventhand 10 10\n", "3676 tio 10 3\n", "2304 colegio 10 7\n", "3787 waikato 10 7\n", "1018 sions 10 5\n", "8126 ovalau 10 6\n", "2607 lornedale 10 9\n", "2211 sul 10 3\n", "1768 latshaw 10 7\n", "7900 ver 10 3\n", "5448 dont 10 4\n", "4893 jes 10 3\n", "490 iiii 10 4\n", "4566 hillcrest 9 9\n", "4778 ther 9 4\n", "3154 divi 9 4\n", "5936 churchschool 9 12\n", "4524 sirable 9 7\n", "3436 nyhyttan 9 8\n", "5764 ade 9 3\n", "1486 buresala 9 8\n", "1248 welltrained 9 11\n", "... ... ... ...\n", "465 pursual 5 7\n", "5856 sixtyfive 5 9\n", "4865 tbe 5 3\n", "4873 coun 4 4\n", "8660 fortyfive 4 9\n", "1111 brenke 4 6\n", "4474 sangster 4 8\n", "7938 dishwashing 4 11\n", "2092 ents 4 4\n", "8588 fernwood 4 8\n", "8637 ral 4 3\n", "5133 tra 4 3\n", "7816 sayce 4 5\n", "1070 sirup 4 5\n", "1959 priori 4 6\n", "7752 freeset 4 7\n", "4460 baro 4 4\n", "4418 goodloe 4 7\n", "639 memoriam 4 8\n", "5202 sionary 4 7\n", "2320 flow'rs 4 7\n", "7598 beauti 4 6\n", "8700 farreaching 4 11\n", "8570 bers 4 4\n", "1053 burmans 4 7\n", "1560 burdett 4 7\n", "1912 wheatless 4 9\n", "1473 mee 4 3\n", "8481 dinsmore 4 8\n", "992 tian 4 4\n", "8342 tubere 4 6\n", "1613 mmmmmm 4 6\n", "1448 fitchburg 4 9\n", "1661 ecole 4 5\n", "1676 voyce 4 5\n", "1679 loth 4 4\n", "945 ith 4 3\n", "4752 untechnical 4 11\n", "7515 duqoin 4 6\n", "4922 preeeptresses 4 13\n", "1197 allround 4 8\n", "4931 academie 4 8\n", "4848 teachers' 4 9\n", "56 wel 4 3\n", "1866 unpedagogical 4 13\n", "5001 'neath 4 6\n", "8147 eighthgrade 4 11\n", "5298 'em 4 3\n", "2372 wirt 4 4\n", "7457 ove 4 3\n", "5702 dunamis 4 7\n", "3082 serampur 4 8\n", "5724 connell 4 7\n", "3084 crowell 4 7\n", "3108 wiggin 4 6\n", "3166 gillott 4 7\n", "6565 ucation 4 7\n", "5787 bab 4 3\n", "3269 excellences 4 11\n", "3280 postum 4 6\n", "3301 milner 4 6\n", "6444 boundarylines 4 13\n", "4070 farland 4 7\n", "3320 wellregulated 4 13\n", "3398 ordinating 4 10\n", "3476 hilprecht 4 9\n", "6221 robie 4 5\n", "3521 nally 4 5\n", "5843 harlen 4 6\n", "3586 oth 4 3\n", "3636 aik 4 3\n", "4051 timehonored 4 11\n", "5874 patsey 4 6\n", "6662 prac 4 4\n", "6664 ulty 4 4\n", "6714 homiletical 4 11\n", "4137 afe 4 3\n", "7456 sidewise 4 8\n", "5964 tetzlaff 4 8\n", "5323 'it 4 3\n", "7354 flagg 4 5\n", "7323 das 4 3\n", "2490 tuitions 4 8\n", "4170 openair 4 7\n", "2674 sabbathschool 4 13\n", "5450 literatures 4 11\n", "2800 'that 4 5\n", "7071 ies 4 3\n", "3068 twelvegrade 4 11\n", "7039 owne 4 4\n", "2821 trilliums 4 9\n", "2888 mit 4 3\n", "2952 cli 4 3\n", "2978 proteid 4 7\n", "6912 splain 4 6\n", "6909 'twould 4 7\n", "5583 godfearing 4 10\n", "6868 lation 4 6\n", "4843 fehling 4 7\n", "1058 parentteacher 4 13\n", "\n", "[341 rows x 3 columns]\n" ] } ], "source": [ "title = 'CE'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T16:17:58.554458", "start_time": "2016-12-12T16:17:58.462498" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for CUV:\n", " spell_error count word_length\n", "13919 ppf 2144 3\n", "25398 'the 510 4\n", "16367 brownlee 459 8\n", "20052 chas 446 4\n", "10106 sabbathschool 362 13\n", "20788 luzerne 361 7\n", "44554 'of 332 3\n", "13057 seventhday 324 10\n", "46923 reichenbach 312 11\n", "2780 elphatrick 307 10\n", "15194 morgantown 240 10\n", "44576 buttermore 232 10\n", "41310 'and 229 4\n", "56418 'to 216 3\n", "20403 leesburg 212 8\n", "17184 bfl 208 3\n", "40409 barto 206 5\n", "26122 columbiana 204 10\n", "19271 hicksville 199 10\n", "48767 gearhart 199 8\n", "57941 paden 198 5\n", "37636 oertley 187 7\n", "35831 syphers 182 7\n", "20825 yingling 182 8\n", "48291 phila 173 5\n", "33894 dowell 173 6\n", "44209 tolliver 170 8\n", "49251 dowling 170 7\n", "55177 conneaut 169 8\n", "4230 westmoreland 168 12\n", "11915 charloe 166 7\n", "61891 broughton 159 9\n", "49726 pengelly 157 8\n", "34476 meigs 155 5\n", "43721 dunkinson 150 9\n", "37481 tion 148 4\n", "12967 corry 147 5\n", "24454 apsley 147 6\n", "32656 silber 145 6\n", "44853 barnesville 142 11\n", "19894 lehigh 142 6\n", "25821 massillon 141 9\n", "24914 pre 138 3\n", "7090 gerhart 138 7\n", "55700 brownell 137 8\n", "44763 smithsburg 136 10\n", "62599 wanteda 134 7\n", "3051 midkiff 133 7\n", "42654 stroudsburg 131 11\n", "33542 kohr 126 4\n", "4561 tioga 126 5\n", "39973 harford 125 7\n", "3421 'in 124 3\n", "41492 'be 123 3\n", "62057 zimmerly 116 8\n", "9630 richland 115 8\n", "21950 thi 115 3\n", "8395 maloney 111 7\n", "36874 eusey 109 5\n", "20306 mingo 108 5\n", "112 searles 108 7\n", "53621 mahoning 106 8\n", "30013 cabell 103 6\n", "19229 pemberville 103 11\n", "53562 ofthe 102 5\n", "11076 bookmen 101 7\n", "25036 muskingum 101 9\n", "57794 braxton 100 7\n", "20358 isitor 99 6\n", "11455 carbondale 98 10\n", "29889 greenspring 97 11\n", "49597 cobr 97 4\n", "51886 gordonsville 96 12\n", "44508 wytheville 96 10\n", "29347 marysville 95 10\n", "14228 pickaway 95 8\n", "40000 paulding 95 8\n", "54282 meadville 94 9\n", "41740 'that 93 5\n", "36965 ashtabula 93 9\n", "24323 sayre 92 5\n", "28052 carthy 92 6\n", "42365 hubbell 92 7\n", "36383 heaton 91 6\n", "22290 bentz 90 5\n", "55583 wellsboro 90 9\n", "47723 vanzant 90 7\n", "9937 bassler 89 7\n", "32527 fairhill 88 8\n", "55232 mis 88 3\n", "7192 blest 88 5\n", "16898 rager 85 5\n", "53012 garmo 85 5\n", "6472 cuyahoga 84 8\n", "62491 miscl 84 5\n", "45618 honesdale 83 9\n", "14254 twentyfive 83 10\n", "29827 eachern 83 7\n", "23868 conwell 82 7\n", "3833 monongalia 82 10\n", "... ... ... ...\n", "28591 gestions 4 8\n", "28579 'members 4 8\n", "28535 osed 4 4\n", "28497 rii 4 3\n", "28496 recanvassing 4 12\n", "32431 'near 4 5\n", "32497 missionory 4 10\n", "38127 ris 4 3\n", "35948 'sin 4 4\n", "36597 edito 4 5\n", "36445 ular 4 4\n", "36366 chlo 4 4\n", "36283 ttt 4 3\n", "36082 religio 4 7\n", "36081 lyconing 4 8\n", "36057 firstday 4 8\n", "35992 'boys 4 5\n", "35962 mohler 4 6\n", "35927 cti 4 3\n", "35159 againit 4 7\n", "35857 'cents 4 6\n", "35843 springlield 4 11\n", "35752 affort 4 6\n", "35664 'carry 4 6\n", "35501 emmons 4 6\n", "35480 'connection 4 11\n", "35426 murry 4 5\n", "35258 prayermeeting 4 13\n", "35232 elvaton 4 7\n", "36642 fbr 4 3\n", "36899 tihe 4 4\n", "36925 conw 4 4\n", "37015 youwill 4 7\n", "37965 misel 4 5\n", "37963 gli 4 3\n", "37959 'part 4 5\n", "37900 sil 4 3\n", "37871 lord' 4 5\n", "37836 'took 4 5\n", "37793 birt 4 4\n", "37750 bordentown 4 10\n", "37611 weat 4 4\n", "37499 appre 4 5\n", "37469 uernon 4 6\n", "37436 excutive 4 8\n", "37411 mookerjie 4 9\n", "37388 reiehenbach 4 11\n", "37380 peoplein 4 8\n", "37287 'place 4 6\n", "37191 agents' 4 7\n", "37174 kil 4 3\n", "37027 alio 4 4\n", "35224 nol 4 3\n", "35131 ruthenians 4 10\n", "32689 oon 4 3\n", "33282 ommittee 4 8\n", "33747 othet 4 5\n", "33712 appr 4 4\n", "33692 summitt 4 7\n", "33601 landmuckfrom 4 12\n", "33474 bre 4 3\n", "33463 gouldsboro 4 10\n", "33431 worldthe 4 8\n", "33349 hord 4 4\n", "33290 confe 4 5\n", "33246 'showed 4 7\n", "35035 'alone 4 6\n", "33219 'important 4 10\n", "33192 winn 4 4\n", "33180 agusta 4 6\n", "33035 someof 4 6\n", "33028 trict 4 5\n", "33015 lura 4 4\n", "33014 whytsell 4 8\n", "32958 timeand 4 7\n", "32794 wenty 4 5\n", "33763 cohm 4 4\n", "33809 allene 4 6\n", "33858 dre 4 3\n", "33898 ior 4 3\n", "35032 sickler 4 7\n", "35003 depositaries 4 12\n", "34993 thefollowing 4 12\n", "34911 theth 4 5\n", "34511 humphries 4 9\n", "34441 whichwe 4 7\n", "34435 camerata 4 8\n", "34412 judiasm 4 7\n", "34407 'up 4 3\n", "34393 kly 4 3\n", "34330 gation 4 6\n", "34292 ourwork 4 7\n", "34281 clementon 4 9\n", "34271 loveof 4 6\n", "34238 exe 4 3\n", "34182 repot 4 5\n", "34125 rigby 4 5\n", "34115 geade 4 5\n", "34035 diegel 4 6\n", "62810 structed 4 8\n", "\n", "[3794 rows x 3 columns]\n" ] } ], "source": [ "title = 'CUV'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T16:17:59.131013", "start_time": "2016-12-12T16:17:59.095909" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for EDU:\n", " spell_error count word_length\n", "315 sloyd 29 5\n", "2166 bamberger 13 9\n", "2471 tion 12 4\n", "1920 salomon 11 7\n", "364 dep't 11 5\n", "1125 pre 10 3\n", "2519 abrahamson 9 10\n", "959 'the 8 4\n", "924 pub'g 8 5\n", "1772 anb 7 3\n", "2111 cator 7 5\n", "790 ment 7 4\n", "2419 publicschool 6 12\n", "2925 educa 6 5\n", "1783 thr 6 3\n", "1850 naas 6 4\n", "2676 majestatsbeleidigung 5 20\n", "2760 perlen 5 6\n", "2275 mit 5 3\n", "2815 'of 5 3\n", "15 morrill 5 7\n", "1786 brownell 5 8\n", "1592 frederikshavn 5 13\n", "1469 education' 5 10\n", "1330 edu 5 3\n", "106 tiie 4 4\n", "2569 educato 4 7\n", "166 ture 4 4\n", "258 tional 4 6\n", "2810 vergil 4 6\n", "2798 dingley 4 7\n", "441 whatley 4 7\n", "582 tre 4 3\n", "1038 'and 4 4\n", "2523 don'ts 4 6\n", "2057 micr 4 4\n", "2483 chas 4 4\n", "1436 cygnaeus 4 8\n", "2317 dhi 4 3\n", "2228 lan 4 3\n", "2992 ent 4 3\n" ] } ], "source": [ "title = 'EDU'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T16:18:00.067510", "start_time": "2016-12-12T16:17:59.981272" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for GCB:\n", " spell_error count word_length\n", "24302 tion 679 4\n", "40181 gcs 436 3\n", "38395 ence 346 4\n", "36370 'the 338 4\n", "42459 ference 289 7\n", "41860 ment 240 4\n", "37146 'of 232 3\n", "31501 ple 186 3\n", "24655 sabbathschool 179 13\n", "26374 'to 166 3\n", "24315 ers 161 3\n", "24659 tions 149 5\n", "42400 eral 146 4\n", "29039 basle 130 5\n", "13869 'be 130 3\n", "40618 chas 127 4\n", "22087 mittee 121 6\n", "15788 seventhday 111 10\n", "35805 ulletin 109 7\n", "8647 'and 109 4\n", "25443 'in 104 3\n", "31065 pre 103 3\n", "21205 sionary 100 7\n", "17212 mis 100 3\n", "6908 amens 85 5\n", "7977 ent 76 3\n", "43210 ren 75 3\n", "20925 ile 72 3\n", "38708 ences 72 5\n", "27563 tional 72 6\n", "2573 'that 71 5\n", "35810 agt 71 3\n", "37530 weiherweg 71 9\n", "18885 ments 67 5\n", "24306 ber 67 3\n", "38577 ary 62 3\n", "17636 lieve 60 5\n", "3381 sabbathkeepers 60 14\n", "41334 peo 60 3\n", "27475 ture 58 4\n", "23152 partment 56 8\n", "33824 'for 56 4\n", "15849 eign 56 4\n", "24241 ful 55 3\n", "2804 'by 54 3\n", "33586 ferences 54 8\n", "2135 dred 53 4\n", "14159 sions 52 5\n", "3133 bers 52 4\n", "2614 inthe 52 5\n", "19585 ized 50 4\n", "17721 'we 50 3\n", "2072 tle 50 3\n", "4368 thi 49 3\n", "13997 akersgaden 48 10\n", "3617 canv 47 4\n", "18810 ters 47 4\n", "21227 ical 45 4\n", "8457 prin 44 4\n", "33728 'is 44 3\n", "21993 sabbathschools 44 14\n", "5220 'have 43 5\n", "30226 ciples 42 6\n", "2261 tem 42 3\n", "4355 'but 41 4\n", "37731 taranaki 41 8\n", "35793 ning 41 4\n", "28477 cutchen 41 7\n", "9115 campmeetings 40 12\n", "6985 'work 40 5\n", "36171 ern 40 3\n", "24909 brunson 40 7\n", "21153 dren 40 4\n", "33487 ity 39 3\n", "10190 tian 39 4\n", "26399 correo 38 6\n", "39355 tive 38 4\n", "40619 sented 38 6\n", "12562 bourke 38 6\n", "22971 'been 38 5\n", "26460 ofthe 37 5\n", "37881 raratonga 36 9\n", "3719 'as 36 3\n", "36429 ioo 36 3\n", "43482 clure 35 5\n", "29900 ican 35 4\n", "3345 sible 35 5\n", "18834 cial 35 4\n", "16266 shiba 35 5\n", "1811 hildebran 35 9\n", "17146 fifield 35 7\n", "20134 rethe 35 5\n", "24293 conthe 35 6\n", "31618 dailybulletin 34 13\n", "40716 tothe 34 5\n", "3901 kee 34 3\n", "2164 erty 33 4\n", "12247 mal 33 3\n", "40543 fora 33 4\n", "33817 ceived 32 6\n", "... ... ... ...\n", "11914 overing 4 7\n", "11797 nueva 4 5\n", "11772 thework 4 7\n", "14372 overthe 4 7\n", "14467 bahler 4 6\n", "31838 conpeople 4 9\n", "17047 apand 4 5\n", "17423 herethe 4 7\n", "17267 thc 4 3\n", "17236 tempation 4 9\n", "17202 'territory 4 10\n", "17194 peoa 4 4\n", "17157 asuncion 4 8\n", "32609 ihave 4 5\n", "17070 standthe 4 8\n", "17066 sto 4 3\n", "32672 sinlessness 4 11\n", "17496 nominationsr 4 12\n", "17040 to' 4 3\n", "16977 terly 4 5\n", "16962 'new 4 4\n", "16945 stantial 4 8\n", "16828 prieser 4 7\n", "16793 harthe 4 6\n", "32838 thisthe 4 7\n", "16776 departthe 4 9\n", "32848 gle 4 3\n", "32309 inour 4 5\n", "32275 retheir 4 7\n", "32898 'thousand 4 9\n", "18198 gerona 4 6\n", "18668 burmah 4 6\n", "18646 himselfthe 4 10\n", "31878 'sent 4 5\n", "18357 peoof 4 5\n", "31983 iences 4 6\n", "18352 bogota 4 6\n", "18306 kjellman 4 8\n", "32043 asthe 4 5\n", "32058 veloped 4 7\n", "32069 hinderance 4 10\n", "17603 mesto 4 5\n", "18096 zations 4 7\n", "17976 sprohge 4 7\n", "17943 'year 4 5\n", "32167 mising 4 6\n", "17941 vith 4 4\n", "17758 misand 4 6\n", "17610 saleof 4 6\n", "32239 beis 4 4\n", "32252 kirkle 4 6\n", "16762 dantly 4 6\n", "32903 fested 4 6\n", "14478 conever 4 7\n", "15368 pra 4 3\n", "15768 'daily 4 6\n", "33409 keiskama 4 8\n", "33457 tuxen 4 5\n", "15737 swiggart 4 8\n", "15553 'training 4 9\n", "33495 connec 4 6\n", "15521 sonship 4 7\n", "15485 vartija 4 7\n", "15453 heavenlies 4 10\n", "15219 preciation 4 10\n", "15819 elffers 4 7\n", "15186 tralasian 4 9\n", "33644 neander 4 7\n", "33676 erning 4 6\n", "15084 conmake 4 7\n", "14831 diningroom 4 10\n", "14772 reour 4 5\n", "33786 'man 4 4\n", "33796 tinually 4 8\n", "14533 geheimnis 4 9\n", "15796 maxon 4 5\n", "33332 papanui 4 7\n", "32906 laplandish 4 10\n", "16497 'doing 4 6\n", "16663 cisely 4 6\n", "16658 farrer 4 6\n", "32981 foland 4 6\n", "32983 ured 4 4\n", "16625 gideonites 4 10\n", "16596 ticed 4 5\n", "16513 pointment 4 9\n", "33032 rael 4 4\n", "33074 ishing 4 6\n", "16452 sota 4 4\n", "15903 ''the 4 5\n", "16368 imthat 4 6\n", "16247 godand 4 6\n", "33158 lletin 4 6\n", "16244 manity 4 6\n", "16200 wherethe 4 8\n", "33243 merly 4 5\n", "16004 'business 4 9\n", "15960 michigani 4 9\n", "33299 wildgrube 4 9\n", "22045 'mission 4 8\n", "\n", "[2376 rows x 3 columns]\n" ] } ], "source": [ "title = 'GCB'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T16:18:01.175447", "start_time": "2016-12-12T16:18:01.108222" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for GH:\n", " spell_error count word_length\n", "26162 smouse 177 6\n", "26702 'the 153 4\n", "24283 schramm 113 7\n", "268 thot 107 4\n", "16496 'of 104 3\n", "18531 jno 99 3\n", "22762 chas 92 4\n", "20048 lintonia 82 8\n", "3106 tion 75 4\n", "5571 'to 75 3\n", "11071 altho 72 5\n", "10755 'and 72 4\n", "452 calmar 66 6\n", "24046 brot 65 4\n", "3395 maynor 62 6\n", "10348 strother 55 8\n", "4752 gos 54 3\n", "16306 eze 51 3\n", "11342 'em 50 3\n", "21187 mis 46 3\n", "11245 pre 46 3\n", "20415 'in 45 3\n", "5388 thi 44 3\n", "24252 ment 44 4\n", "5612 spartanburg 41 11\n", "7875 wilsonia 41 8\n", "20836 gemon 40 5\n", "13581 corsicana 40 9\n", "22388 wagor 39 5\n", "17544 thos 39 4\n", "7583 newbern 38 7\n", "21881 ohe 37 3\n", "5992 brethern 37 8\n", "6405 ocala 37 5\n", "18610 ospe 37 4\n", "16684 vagh 36 4\n", "16728 preceeding 36 10\n", "13719 orangeburg 36 10\n", "3243 'that 34 5\n", "26195 dont 34 4\n", "7749 ospel 34 5\n", "4935 oclock 32 6\n", "3120 ers 32 3\n", "11858 pel 31 3\n", "14738 'for 31 4\n", "16603 abney 31 5\n", "3298 inthe 30 5\n", "5138 tir 30 3\n", "3127 ments 29 5\n", "17880 ioo 29 3\n", "3111 ber 29 3\n", "14898 ood 28 3\n", "20104 ence 28 4\n", "8354 'are 27 4\n", "20998 seventhday 27 10\n", "14607 'is 27 3\n", "11519 mal 26 3\n", "3021 ful 26 3\n", "6459 ent 26 3\n", "3522 tions 26 5\n", "5669 ofthe 26 5\n", "11284 thots 25 5\n", "8640 ference 25 7\n", "15692 palo 25 4\n", "3989 palatka 25 7\n", "16954 thes 24 4\n", "18373 selfdenial 24 10\n", "17584 ern 24 3\n", "17549 ple 23 3\n", "576 simons 23 6\n", "25136 whetsel 23 7\n", "11163 blest 22 5\n", "4770 'not 22 4\n", "26044 sionary 22 7\n", "1679 ver 21 3\n", "7448 kno 21 3\n", "21027 sabbathschool 21 13\n", "25955 gorda 21 5\n", "26330 cleburne 21 8\n", "17323 'be 21 3\n", "19146 austell 21 7\n", "3905 tothe 21 5\n", "20894 sel 21 3\n", "21883 'it 20 3\n", "14089 erald 20 5\n", "6952 ves 20 3\n", "10003 'have 20 5\n", "570 loth 20 4\n", "13777 mer 20 3\n", "21831 'we 19 3\n", "7794 ren 19 3\n", "6711 olvin 19 5\n", "4441 psa 19 3\n", "1954 brack 19 5\n", "13871 devalls 19 7\n", "5396 punta 19 5\n", "3589 kittie 18 6\n", "4180 whi 18 3\n", "11799 ville 18 5\n", "7170 'this 18 5\n", "... ... ... ...\n", "11222 tery 4 4\n", "13451 plete 4 5\n", "8771 loomis 4 6\n", "16644 stangood 4 8\n", "21355 'such 4 5\n", "8656 neces 4 5\n", "20113 ands 4 4\n", "5090 'hundred 4 8\n", "5112 hayti 4 5\n", "20015 simonds 4 7\n", "19972 saken 4 5\n", "5169 tay 4 3\n", "5178 pigott 4 6\n", "5181 depew 4 5\n", "5291 tice 4 4\n", "5327 authur 4 6\n", "5397 mrand 4 5\n", "5579 pla 4 3\n", "19719 hinchcliff 4 10\n", "5646 busines 4 7\n", "5651 cuyler 4 6\n", "19618 vith 4 4\n", "5762 eralo 4 5\n", "19428 the' 4 4\n", "19395 woodall 4 7\n", "19354 chrichlow 4 9\n", "19281 iff 4 3\n", "20185 vicks 4 5\n", "5067 'come 4 5\n", "4956 aaa 4 3\n", "20638 prehaps 4 7\n", "3560 nesmith 4 7\n", "3662 gress 4 5\n", "21125 pia 4 3\n", "20875 knowlege 4 8\n", "4200 wyandottes 4 10\n", "20820 'himself 4 8\n", "20748 ditions 4 7\n", "4335 froin 4 5\n", "20658 purty 4 5\n", "4344 shouldbe 4 8\n", "4949 hyman 4 5\n", "20552 gossage 4 7\n", "4400 kirkwood 4 8\n", "4427 workin 4 6\n", "20503 figtree 4 7\n", "4497 worthen 4 7\n", "20452 of'the 4 6\n", "4555 vitamines 4 9\n", "20357 'doing 4 6\n", "4929 aving 4 5\n", "6237 mony 4 4\n", "6475 knowed 4 6\n", "19022 wiseman 4 7\n", "17146 beilby 4 6\n", "7569 'said 4 5\n", "17450 aniong 4 6\n", "17382 maren 4 5\n", "7837 aubigne 4 7\n", "17369 elzirah 4 7\n", "8030 cbe 4 3\n", "8228 ories 4 5\n", "17168 iiiii 4 5\n", "17155 kerns 4 5\n", "8245 twentyone 4 9\n", "17543 bas 4 3\n", "8251 gertie 4 6\n", "8305 espie 4 5\n", "8396 'could 4 6\n", "16955 ata 4 3\n", "16880 georgie 4 7\n", "8562 cormick 4 7\n", "16721 dif 4 3\n", "8622 ath 4 3\n", "8643 sie 4 3\n", "17510 ial 4 3\n", "7320 ered 4 4\n", "6483 couraging 4 9\n", "6846 gospet 4 6\n", "18836 ierald 4 6\n", "6594 dearmon 4 7\n", "18720 aweary 4 6\n", "18712 hewas 4 5\n", "18632 nutt 4 4\n", "6708 willbe 4 6\n", "18396 rishel 4 6\n", "18391 gosp 4 4\n", "6844 ragan 4 5\n", "18187 nian 4 4\n", "7298 krag 4 4\n", "7050 iord 4 4\n", "7069 arenow 4 6\n", "18017 pilkington 4 10\n", "7070 sisson 4 6\n", "7119 ottr 4 4\n", "7127 ventists 4 8\n", "7133 willacoochee 4 12\n", "17737 hedin 4 5\n", "7241 ough 4 4\n", "13173 cism 4 4\n", "\n", "[1025 rows x 3 columns]\n" ] } ], "source": [ "title = 'GH'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T16:18:02.261363", "start_time": "2016-12-12T16:18:02.236586" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for GOH:\n", " spell_error count word_length\n", "1063 nuttose 51 7\n", "1553 bromose 24 7\n", "2597 abbie 20 5\n", "1316 nuttolene 19 9\n", "71 lauretta 18 8\n", "796 protose 14 7\n", "336 lenna 13 5\n", "1320 mackey 12 6\n", "2532 gruels 10 6\n", "2106 chas 10 4\n", "1191 gos 10 3\n", "2288 tion 10 4\n", "1702 drs 10 3\n", "1527 pel 10 3\n", "865 'the 9 4\n", "661 'in 8 3\n", "1719 princi 8 6\n", "2623 ansh 7 4\n", "327 bouchard 7 8\n", "942 tarium 7 6\n", "1581 mynheer 7 7\n", "1027 croutons 7 8\n", "2073 proteids 7 8\n", "1390 evelene 6 7\n", "1860 'to 6 3\n", "28 dqq 6 3\n", "1043 sel 6 3\n", "972 fredrickshavn 6 13\n", "949 'and 6 4\n", "2433 eze 6 3\n", "2450 onehalf 6 7\n", "424 comfortables 6 12\n", "2453 maltol 6 6\n", "281 jir 6 3\n", "2015 strychnin 5 9\n", "2236 fora 5 4\n", "1958 sitz 5 4\n", "1807 fik 5 3\n", "1720 institut 5 8\n", "2251 sani 5 4\n", "1365 selfdenial 5 10\n", "1714 fft 5 3\n", "1213 heiman 5 6\n", "597 flich 5 5\n", "1189 warne 5 5\n", "2773 thi 5 3\n", "55 dulness 5 7\n", "1956 nux 4 3\n", "2002 allready 4 8\n", "2744 healthdestroying 4 16\n", "2717 schillembeck 4 12\n", "307 lightplant 4 10\n", "348 health' 4 7\n", "394 fatand 4 6\n", "413 lindstrom 4 9\n", "484 vomica 4 6\n", "2415 excrementitious 4 15\n", "2385 seventhday 4 10\n", "798 albumins 4 8\n", "1644 mal 4 3\n", "1033 pre 4 3\n", "1085 'for 4 4\n", "1088 bromo 4 5\n", "1240 ood 4 3\n", "2009 rlich 4 5\n", "985 teachout 4 8\n" ] } ], "source": [ "title = 'GOH'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T16:18:08.344761", "start_time": "2016-12-12T16:18:08.285756" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for GS:\n", " spell_error count word_length\n", "6257 'the 181 4\n", "4423 'of 124 3\n", "12013 aro 111 3\n", "4173 eze 75 3\n", "7908 'to 64 3\n", "15169 'and 63 4\n", "12885 ile 55 3\n", "15862 pre 50 3\n", "13040 ots 47 3\n", "13239 tion 45 4\n", "9558 elds 44 4\n", "9186 timethe 40 7\n", "1689 'is 32 3\n", "4605 'that 32 5\n", "10018 'in 31 3\n", "2806 mal 31 3\n", "15354 ment 30 4\n", "14207 'be 28 3\n", "3879 gospxi 26 6\n", "8048 ofthe 24 5\n", "7658 thi 23 3\n", "6951 ets 22 3\n", "13061 cer 22 3\n", "8082 ehe 21 3\n", "8466 mosheim 21 7\n", "1207 heylyn 21 6\n", "6778 gos 21 3\n", "10868 seventhday 20 10\n", "12373 iow 20 3\n", "11713 'are 19 4\n", "13826 sabbaton 19 8\n", "8764 'he 18 3\n", "7610 haruest 17 7\n", "13056 ble 17 3\n", "7465 'not 17 4\n", "7408 'with 17 5\n", "12985 wharey 16 6\n", "11184 sel 16 3\n", "5987 'his 16 4\n", "5348 glynn 16 5\n", "14511 'it 16 3\n", "16242 pxi 16 3\n", "6577 'as 16 3\n", "539 blest 16 5\n", "15536 ise 16 3\n", "1862 'for 15 4\n", "2425 ver 15 3\n", "18041 'which 15 6\n", "12378 goapxl 15 6\n", "13090 schaff 15 6\n", "14016 'have 14 5\n", "6442 fon 14 3\n", "13163 mor 14 3\n", "18555 goapx 14 5\n", "12539 'will 14 5\n", "5037 'by 13 3\n", "4684 inthe 13 5\n", "10903 sabbathschool 13 13\n", "14177 'all 13 4\n", "4572 vor 13 3\n", "14174 shabbath 13 8\n", "1505 northfield 13 10\n", "2221 goapxi 12 6\n", "572 sigkix 12 6\n", "5985 whi 12 3\n", "6550 berthier 12 8\n", "8564 abrahamic 12 9\n", "15909 'at 12 3\n", "14237 kno 12 3\n", "14637 medo 12 4\n", "10595 thermo 11 6\n", "7296 ple 11 3\n", "6348 ved 11 3\n", "4607 gosp 11 4\n", "12797 gesenius 11 8\n", "1272 ity 11 3\n", "6215 sho 11 3\n", "13910 thd 11 3\n", "1196 'our 11 4\n", "4858 dowling 10 7\n", "11254 murdock 10 7\n", "16947 thr 10 3\n", "13932 chri 10 4\n", "18596 firstday 10 8\n", "10753 wor 10 3\n", "10166 'from 10 5\n", "14405 gop 10 3\n", "7085 vox 10 3\n", "7657 eemperance 10 10\n", "8595 thein 10 5\n", "14178 'this 10 5\n", "5159 shust 10 5\n", "11885 olshausen 10 9\n", "1909 sundaykeeping 10 13\n", "6711 bateham 10 7\n", "9383 neander 10 7\n", "4584 sigklx 9 6\n", "7002 'they 9 5\n", "11782 'upon 9 5\n", "2309 overcomers 9 10\n", "... ... ... ...\n", "6044 haue 4 4\n", "6094 swedena 4 7\n", "6139 igk 4 3\n", "2946 perfeet 4 7\n", "2516 religio 4 7\n", "6713 mina 4 4\n", "1034 rumseller 4 9\n", "201 popo 4 4\n", "228 ving 4 4\n", "348 wisco 4 5\n", "395 'more 4 5\n", "437 sabbathday 4 10\n", "548 thq 4 3\n", "708 eecl 4 4\n", "779 onio 4 4\n", "787 'many 4 5\n", "811 corea 4 5\n", "864 sigkl 4 5\n", "883 'seventh 4 8\n", "1024 'no 4 3\n", "1173 themsel 4 7\n", "2493 'etc 4 4\n", "1217 goapi 4 5\n", "1337 phocas 4 6\n", "1558 royalton 4 8\n", "1600 ght 4 3\n", "1649 ture 4 4\n", "1690 'change 4 7\n", "1786 ople 4 4\n", "1819 ged 4 3\n", "1915 laurvig 4 7\n", "1952 heruli 4 6\n", "2058 jeddo 4 5\n", "2157 translat 4 8\n", "2377 'first 4 6\n", "6253 peopie 4 6\n", "6785 nant 4 4\n", "13364 ingulfed 4 8\n", "11668 gilfillan 4 9\n", "10576 sio 4 3\n", "10590 dungan 4 6\n", "10723 hershe 4 6\n", "10772 'country 4 8\n", "10886 'earth 4 6\n", "11080 atalissa 4 8\n", "11105 vers 4 4\n", "11118 leitchfield 4 11\n", "11203 urrection 4 9\n", "11409 ohe 4 3\n", "11417 ohl 4 3\n", "11499 sabbathbreaking 4 15\n", "11563 ehristian 4 9\n", "12009 yehovah 4 7\n", "10424 'last 4 5\n", "12030 wledge 4 6\n", "12170 'been 4 5\n", "12501 o'f 4 3\n", "12532 elie 4 4\n", "12653 ofhis 4 5\n", "12690 pointments 4 10\n", "12867 doetrine 4 8\n", "12899 ove 4 3\n", "12986 peaceableness 4 13\n", "13029 againat 4 7\n", "13116 kuriakos 4 8\n", "13187 'gospel 4 7\n", "13237 'does 4 5\n", "10574 giv 4 3\n", "10293 oeo 4 3\n", "6860 decretalia 4 10\n", "8797 tentmeetings 4 12\n", "6871 tirosh 4 6\n", "7011 olean 4 5\n", "7257 tution 4 6\n", "7360 tay 4 3\n", "7502 morrice 4 7\n", "7564 hinderance 4 10\n", "7896 catherines 4 10\n", "7980 blo 4 3\n", "8068 ligion 4 6\n", "8306 harrisonville 4 13\n", "8307 ohuroh 4 6\n", "8545 ars 4 3\n", "8610 the' 4 4\n", "8828 thitt 4 5\n", "10253 pgr 4 3\n", "9014 'true 4 5\n", "9038 'us 4 3\n", "9245 'cent 4 5\n", "9417 gowen 4 5\n", "9452 th'e 4 4\n", "9500 hiin 4 4\n", "9567 oro 4 3\n", "9691 giustianni 4 10\n", "9720 gustafson 4 9\n", "9843 longimanus 4 10\n", "9879 anabaino 4 8\n", "10057 alr 4 3\n", "10104 nearl 4 5\n", "18794 'mid 4 4\n", "\n", "[488 rows x 3 columns]\n" ] } ], "source": [ "title = 'GS'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T17:19:57.401389", "start_time": "2016-12-12T17:19:57.341047" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for HM:\n", " spell_error count word_length\n", "5175 gen'l 102 5\n", "6604 durand 97 6\n", "5407 rep't 92 5\n", "435 miscel 79 6\n", "8413 am't 76 4\n", "6180 avenola 76 7\n", "2387 mis 76 3\n", "4402 l't 64 3\n", "3674 cumb 62 4\n", "7453 imlay 57 5\n", "1400 canof 56 5\n", "6032 cassopolis 55 10\n", "630 raiatea 51 7\n", "438 seventhday 49 10\n", "5005 intyre 43 6\n", "2404 aro 43 3\n", "8589 dist's 41 6\n", "1036 schoolcraft 41 11\n", "5697 agt 39 3\n", "866 scand 38 5\n", "2824 wheatena 37 8\n", "6495 vassers 36 7\n", "7911 lehigh 36 6\n", "5797 hayti 35 5\n", "6953 revassers 35 9\n", "6657 'the 34 4\n", "2820 sabbathschool 33 13\n", "8887 deliv 33 5\n", "6758 raratonga 32 9\n", "6635 sabbathkeepers 30 14\n", "3991 bogota 29 6\n", "8095 sendebud 28 8\n", "6412 deliv'd 28 7\n", "4370 susp'n 28 6\n", "4955 tena 28 4\n", "4296 chas 27 4\n", "5404 bordoville 27 10\n", "8910 tion 27 4\n", "9153 riverton 26 8\n", "7520 grandville 26 10\n", "176 mundy 26 5\n", "9898 watrousville 26 12\n", "9029 greenleaf 26 9\n", "1816 farmington 26 10\n", "7604 pierson 25 7\n", "2842 eddyville 25 9\n", "1948 elkhorn 25 7\n", "2334 vaktare 25 7\n", "7776 grinnell 25 8\n", "1444 mor 25 3\n", "4091 pierrepont 25 10\n", "8135 centerville 25 11\n", "8748 afton 25 5\n", "1676 richford 24 8\n", "4005 danvers 24 7\n", "9479 smithland 24 9\n", "2167 coldwater 24 9\n", "3939 charlemont 24 10\n", "4732 morrice 24 7\n", "8875 springside 24 10\n", "3597 fbr 24 3\n", "4929 castana 24 7\n", "2027 lakeview 24 8\n", "6059 alaiedon 24 8\n", "8317 gowen 23 5\n", "3021 ruthven 23 7\n", "5274 stauffer 23 8\n", "3874 vilas 23 5\n", "1079 kitts 23 5\n", "4602 scottville 23 10\n", "4927 waukon 23 6\n", "5624 pre 23 3\n", "2927 elmwood 22 7\n", "6962 saranac 22 7\n", "4382 lmtd 22 4\n", "921 sunbury 22 7\n", "1522 sandyville 22 10\n", "1067 wamego 22 6\n", "4289 ceresco 22 7\n", "1711 vergennes 22 9\n", "7289 middlebury 21 10\n", "1526 evangeliets 21 11\n", "4191 blendon 21 7\n", "1321 sextonville 21 11\n", "4593 webberville 21 11\n", "8494 sinclairville 21 13\n", "3164 jeddo 21 5\n", "3642 vermontville 21 12\n", "1588 edinboro 20 8\n", "1624 parkville 20 9\n", "5738 twentyfive 20 10\n", "5216 eze 20 3\n", "5207 sedalia 20 7\n", "953 childstown 20 10\n", "1194 satolli 20 7\n", "9918 grangeville 20 11\n", "4059 almira 20 6\n", "9590 brookings 20 9\n", "8807 'of 20 3\n", "4284 ladonia 20 7\n", "... ... ... ...\n", "5889 spanishspeaking 4 15\n", "2295 cept 4 4\n", "5958 pmpmpm 4 6\n", "1689 bloomville 4 10\n", "5995 avoca 4 5\n", "2355 nanson 4 6\n", "6167 alpharetta 4 10\n", "6179 ong 4 3\n", "6186 follo 4 5\n", "1496 ili 4 3\n", "6363 'there 4 6\n", "1735 misha 4 5\n", "4250 thein 4 5\n", "6436 stremann 4 8\n", "1402 ithe 4 4\n", "6502 intrust 4 7\n", "6507 nig't 4 5\n", "6514 thirtyfive 4 10\n", "6533 walkerton 4 9\n", "6564 nyassa 4 6\n", "1376 wallowa 4 7\n", "6613 shawmut 4 7\n", "5837 ure 4 3\n", "2278 sanningens 4 10\n", "8534 iss 4 3\n", "5296 thirtythree 4 11\n", "2147 dixo 4 4\n", "2122 apeth 4 5\n", "4943 eldred 4 6\n", "2159 frederikshavn 4 13\n", "5061 ifi 4 3\n", "5080 fide 4 4\n", "4832 traylor 4 7\n", "5170 goldsberry 4 10\n", "1920 'any 4 4\n", "1915 winti 4 5\n", "1857 twentynine 4 10\n", "4552 p'fie 4 5\n", "5362 farnum 4 6\n", "5384 papetoai 4 8\n", "14 tri 4 3\n", "1790 capps 4 5\n", "5473 freemont 4 8\n", "2252 calebs 4 6\n", "5595 brn 4 3\n", "5608 berthoud 4 8\n", "5626 reis 4 4\n", "4572 grenfell 4 8\n", "1346 guadaloupe 4 10\n", "1340 sions 4 5\n", "6711 andthe 4 6\n", "662 hansa 4 5\n", "3805 acra 4 4\n", "7913 hea 4 3\n", "3792 gorman 4 6\n", "7943 ''the 4 5\n", "7954 tierra 4 6\n", "693 amyot 4 5\n", "7990 canv 4 4\n", "8021 kibira 4 6\n", "8052 ansgarius 4 9\n", "8057 caro 4 4\n", "3679 britian 4 7\n", "6737 sbbath 4 6\n", "8185 l'i 4 3\n", "647 br'ght 4 6\n", "3665 visser 4 6\n", "8325 crowther 4 8\n", "8359 kroners 4 7\n", "2752 kelsea 4 6\n", "8442 ble 4 3\n", "8470 godgiven 4 8\n", "8480 presque 4 7\n", "8506 nickerson 4 9\n", "7862 sharpsburg 4 10\n", "7791 priate 4 6\n", "797 pottstown 4 9\n", "849 juras 4 5\n", "1288 taftsville 4 10\n", "6877 seffner 4 7\n", "6885 gome 4 4\n", "6902 'so 4 3\n", "4096 hollandville 4 12\n", "1107 taopi 4 5\n", "2514 'in 4 3\n", "7257 helvetians 4 10\n", "7303 inthe 4 5\n", "3999 metropolitans 4 13\n", "7400 peckham 4 7\n", "1043 clure 4 5\n", "1024 cassopolie 4 10\n", "4913 richville 4 9\n", "7581 wacek 4 5\n", "984 fleshmeats 4 10\n", "2636 allister 4 8\n", "7620 espirito 4 8\n", "937 itinerating 4 11\n", "7703 bliven 4 6\n", "3880 medora 4 6\n", "6686 nowlin 4 6\n", "\n", "[670 rows x 3 columns]\n" ] } ], "source": [ "title = 'HM'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2016-12-12T18:54:29.424701", "start_time": "2016-12-12T18:54:29.218068" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for HR:\n", " spell_error count word_length\n", "28466 tion 950 4\n", "20855 sel 633 3\n", "72492 cafe 595 4\n", "15620 sitz 460 4\n", "50304 ment 445 4\n", "31479 pre 423 3\n", "61348 proteid 417 7\n", "45282 hydrozone 266 9\n", "2316 tions 265 5\n", "76552 glycozone 250 9\n", "26613 kumyss 247 6\n", "32473 agt 220 3\n", "55482 chas 217 4\n", "79511 marchand 215 8\n", "37440 priessnitz 207 10\n", "40862 sirup 180 5\n", "12623 tremens 180 7\n", "30726 hypopepsia 177 10\n", "61891 'em 176 3\n", "11648 tri 175 3\n", "90127 ance 169 4\n", "81227 ble 157 3\n", "72423 keeley 157 6\n", "31201 ous 155 3\n", "67026 ments 155 5\n", "15859 ful 154 3\n", "56875 'the 154 4\n", "62831 tem 152 3\n", "6277 trall 147 5\n", "86637 deimel 146 6\n", "73573 cornaro 138 7\n", "6184 ers 133 3\n", "69701 ence 132 4\n", "69993 ent 128 3\n", "88946 microscopists 126 13\n", "43989 ity 125 3\n", "71050 clure 125 5\n", "78955 ecole 120 5\n", "62432 kedzie 120 6\n", "16325 wuz 120 3\n", "5731 onehalf 119 7\n", "27297 ridpath 119 7\n", "11176 hygeio 118 6\n", "31228 ili 116 3\n", "86271 hydriatic 116 9\n", "30615 ple 114 3\n", "61584 vick 112 4\n", "79835 socalled 108 8\n", "26309 fehr 108 4\n", "58111 gruels 107 6\n", "81217 ture 106 4\n", "16890 infantum 106 8\n", "7069 paso 106 4\n", "85605 ure 105 3\n", "86141 electropoise 104 12\n", "83812 pim 104 3\n", "53225 dren 97 4\n", "81478 ical 97 4\n", "27280 tle 96 3\n", "91992 basle 96 5\n", "68797 ber 96 3\n", "19271 meltose 95 7\n", "45251 derangements 94 12\n", "4916 mal 94 3\n", "86489 munn 92 4\n", "50416 twentyfive 91 10\n", "91345 centrale 91 8\n", "76742 ealth 91 5\n", "31962 pharmacal 90 9\n", "41540 schoolcraft 89 11\n", "79481 drexel 89 6\n", "64041 dextrinized 87 11\n", "56573 ceo 86 3\n", "82386 soo 86 3\n", "23840 strychnia 85 9\n", "83553 caffein 84 7\n", "36429 crandon 84 7\n", "4888 morbus 84 6\n", "26457 corpore 82 7\n", "91308 bacco 82 5\n", "41557 enemata 81 7\n", "9922 institut 81 8\n", "34059 parral 80 6\n", "81467 eral 79 4\n", "34434 alabastine 79 10\n", "91533 bloodvessels 78 12\n", "39792 pawlow 77 6\n", "58759 ioo 76 3\n", "18831 chautauquan 76 11\n", "55601 mis 76 3\n", "73873 accom 75 5\n", "35276 twentyfour 75 10\n", "54967 ood 74 3\n", "12182 colman 74 6\n", "11664 sanitaire 74 9\n", "3136 farnum 73 6\n", "67838 boylston 73 8\n", "13676 ani 73 3\n", "83099 murdock 72 7\n", "41584 condit 71 6\n", "... ... ... ...\n", "70435 ified 4 5\n", "70472 wellboiled 4 10\n", "70492 threeor 4 7\n", "12689 appara 4 6\n", "33935 terly 4 5\n", "4682 emorest 4 7\n", "71584 sendfreea 4 9\n", "4722 breederswe 4 10\n", "71472 cotosuet 4 8\n", "71322 bacheler 4 8\n", "34238 shafer 4 6\n", "34281 sensical 4 8\n", "4738 moand 4 5\n", "4755 up' 4 3\n", "34288 here' 4 5\n", "71239 tipulary 4 8\n", "4804 moqui 4 5\n", "71213 eyesa 4 5\n", "71122 hwth 4 4\n", "34295 sacri 4 5\n", "12779 glycorone 4 9\n", "4826 axler 4 5\n", "34445 villemin 4 8\n", "34491 youare 4 6\n", "70890 zemzem 4 6\n", "70843 grizel 4 6\n", "34600 ential 4 6\n", "34616 tinues 4 6\n", "34797 divorcecourts 4 13\n", "34814 tiiis 4 5\n", "70671 mieh 4 4\n", "34947 evrard 4 6\n", "35141 nute 4 4\n", "35903 faris 4 5\n", "36035 meateater 4 9\n", "69392 vaipipg 4 7\n", "12251 giessen 4 7\n", "5375 manwoman 4 8\n", "12227 seg 4 3\n", "37045 egtensive 4 9\n", "12226 elc 4 3\n", "37198 britian 4 7\n", "68119 trils 4 5\n", "37231 apprecia 4 8\n", "37279 waddington 4 10\n", "68097 flagg 4 5\n", "68033 arrearages 4 10\n", "68032 shiverings 4 10\n", "68016 kisi 4 4\n", "37313 iixa 4 4\n", "37362 eie 4 3\n", "67970 workin 4 6\n", "67953 brushings 4 9\n", "12177 rassed 4 6\n", "37456 oeen 4 4\n", "67878 lachrymation 4 12\n", "5497 advertbements 4 13\n", "37531 therapeuptic 4 12\n", "67812 necessaryand 4 12\n", "37622 bastie 4 6\n", "12146 aire 4 4\n", "37656 exis 4 4\n", "67685 eat' 4 4\n", "12068 alexins 4 7\n", "37826 inspec 4 6\n", "67602 ampmpm 4 6\n", "37003 distemperate 4 12\n", "68567 drouths 4 7\n", "12463 rawnsley 4 8\n", "5366 'same 4 5\n", "36136 formad 4 6\n", "36138 oir 4 3\n", "69363 brophy 4 6\n", "36154 yellowfever 4 11\n", "69325 shal 4 4\n", "36164 ceeding 4 7\n", "5214 laundried 4 9\n", "69210 mps 4 3\n", "36307 ljtaith 4 7\n", "69183 ved 4 3\n", "69174 snanitiatarriium 4 16\n", "12392 icycle 4 6\n", "12321 razzle 4 6\n", "68979 lthough 4 7\n", "5308 heatmaking 4 10\n", "5315 sweetcakes 4 10\n", "68920 schnirer 4 8\n", "36673 reina 4 5\n", "68880 crowell 4 7\n", "68812 inbe 4 4\n", "12287 maake 4 5\n", "36849 crt 4 3\n", "68705 amd 4 3\n", "36871 divinny 4 7\n", "36891 neison 4 6\n", "68671 eted 4 4\n", "36963 lallemand 4 9\n", "36964 perihelionists 4 14\n", "36965 itif 4 4\n", "33550 swinyard 4 8\n", "\n", "[6691 rows x 3 columns]\n" ] } ], "source": [ "title = 'HR'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2016-12-31T12:04:40.931392", "start_time": "2016-12-31T12:04:40.854985" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for IR:\n", " spell_error count word_length\n", "2109 tion 359 4\n", "23146 mahan 315 5\n", "8358 ence 177 4\n", "5308 presidentw 175 10\n", "22751 walkerton 167 9\n", "19537 tions 144 5\n", "6118 unionville 143 10\n", "20371 rocklane 140 8\n", "18307 ference 139 7\n", "3 chas 134 4\n", "14221 ment 131 4\n", "10384 frankton 125 8\n", "1881 inwood 123 6\n", "3615 adiana 121 6\n", "5761 secretaryw 119 10\n", "15796 seventhday 112 10\n", "21673 ber 109 3\n", "7323 medaryville 90 11\n", "416 ple 88 3\n", "17156 geporter 86 8\n", "21982 horlacher 84 9\n", "14207 suptc 82 5\n", "2119 ers 81 3\n", "4000 boze 79 4\n", "22545 ren 79 3\n", "7738 dilworth 77 8\n", "6947 brookston 75 9\n", "18172 indpls 75 6\n", "12118 committeew 72 10\n", "1050 sionary 72 7\n", "14809 minnick 67 7\n", "21790 ent 67 3\n", "131 sunman 65 6\n", "4164 ary 62 3\n", "19087 pepple 61 6\n", "20962 treasurerw 58 10\n", "12859 cuaig 57 5\n", "11833 secretariesa 56 12\n", "18610 treasurera 56 10\n", "132 nuding 56 6\n", "12629 treasurert 55 10\n", "3364 mis 53 3\n", "14125 burkhart 52 8\n", "9700 missionaryr 52 11\n", "4098 ance 52 4\n", "2708 ville 51 5\n", "13909 ments 51 5\n", "4904 beath 51 5\n", "1401 ters 50 4\n", "14087 adelia 50 6\n", "16039 hodapp 49 6\n", "14115 haskins 47 7\n", "14114 pre 45 3\n", "4547 ful 45 3\n", "6901 busz 42 4\n", "116 eral 41 4\n", "5978 dianapolis 39 10\n", "18445 higbee 39 6\n", "14974 rium 39 4\n", "11370 lugenbeal 39 9\n", "18844 mittee 39 6\n", "2401 wirt 36 4\n", "16179 metzker 36 7\n", "6588 possman 36 7\n", "8389 bers 36 4\n", "23220 altho 35 5\n", "8679 indi 35 4\n", "22151 athen 35 5\n", "7184 britton 35 7\n", "21422 hussey 34 6\n", "19175 apolis 34 6\n", "15883 mellinger 34 9\n", "21458 ceived 34 6\n", "10926 wanteda 33 7\n", "6774 'the 33 4\n", "12103 crary 33 5\n", "8081 dren 33 4\n", "9091 cleland 32 7\n", "11763 mal 32 3\n", "7058 ation 31 5\n", "7829 gabriella 31 9\n", "3280 kenney 31 6\n", "14136 libertya 31 8\n", "21880 sions 31 5\n", "19097 larkin 31 6\n", "4819 medicaldr 31 9\n", "16868 korn 30 4\n", "6834 cleotis 30 7\n", "17604 terest 30 6\n", "294 huntingburg 30 11\n", "17233 carahoof 29 8\n", "12197 tarium 29 6\n", "3033 portant 29 7\n", "16948 ture 29 4\n", "6472 secretaryj 28 10\n", "19311 geperter 28 8\n", "79 ington 28 6\n", "19053 ning 28 4\n", "10887 peo 28 3\n", "4225 thos 28 4\n", "... ... ... ...\n", "16000 interthe 4 8\n", "15907 accomodated 4 11\n", "15847 cunig 4 5\n", "15694 neese 4 5\n", "15635 couragingly 4 11\n", "15545 nected 4 6\n", "9278 knowl 4 5\n", "15540 lauffer 4 7\n", "15533 het 4 3\n", "15508 immedi 4 6\n", "15431 whittaker 4 9\n", "15348 elt 4 3\n", "15321 imand 4 5\n", "15248 wer 4 3\n", "14866 tieing 4 6\n", "14750 prepar 4 6\n", "14654 sanitar 4 7\n", "14558 jority 4 6\n", "16384 haye 4 4\n", "16438 occa 4 4\n", "16622 lecting 4 7\n", "16645 sehool 4 6\n", "18137 employes 4 8\n", "17971 lts 4 3\n", "17967 exof 4 4\n", "17945 newed 4 5\n", "17923 ization 4 7\n", "17911 uhe 4 3\n", "17883 conwas 4 6\n", "17820 guage 4 5\n", "17711 strating 4 8\n", "17661 secretarya 4 10\n", "17613 fairlaud 4 8\n", "17609 lms 4 3\n", "17443 preceeded 4 9\n", "17421 ntsh 4 4\n", "17258 sanitari 4 8\n", "17103 remem 4 5\n", "17072 ethelyn 4 7\n", "17021 stantial 4 8\n", "16992 perty 4 5\n", "16968 nancial 4 7\n", "16937 misof 4 5\n", "16767 wakarusa 4 8\n", "16693 cerenola 4 8\n", "14449 lizzfe 4 6\n", "14378 cerely 4 6\n", "14350 memthe 4 6\n", "11961 sug 4 3\n", "11831 clawson 4 7\n", "11736 eeeeee 4 6\n", "11699 deis 4 4\n", "11408 'it 4 3\n", "11179 rewith 4 6\n", "11050 faiththe 4 8\n", "11034 timonies 4 8\n", "10983 indianapous 4 11\n", "10852 laand 4 5\n", "10784 beto 4 4\n", "10686 gansport 4 8\n", "10439 estly 4 5\n", "10277 bewill 4 6\n", "10219 jbuhalts 4 8\n", "10208 cli 4 3\n", "10156 sponded 4 7\n", "10135 durgan 4 6\n", "9837 dustrial 4 8\n", "9769 komo 4 4\n", "9762 iola 4 4\n", "9476 templeton 4 9\n", "9441 'if 4 3\n", "9279 bufialts 4 8\n", "11871 wolflake 4 8\n", "12029 malony 4 6\n", "14343 enty 4 4\n", "12061 shambaugh 4 9\n", "14272 exthat 4 6\n", "14249 walburn 4 7\n", "14190 sto 4 3\n", "14121 'po 4 3\n", "14019 adian 4 5\n", "13765 rcher 4 5\n", "13599 haps 4 4\n", "13479 margeret 4 8\n", "13116 marton 4 6\n", "13099 forand 4 6\n", "12999 liever 4 6\n", "12994 ohmer 4 5\n", "12993 warrick 4 7\n", "12809 cott 4 4\n", "12806 beand 4 5\n", "12780 peoof 4 5\n", "12628 presidenti 4 10\n", "12486 culation 4 8\n", "12464 prepara 4 7\n", "12458 michaelville 4 12\n", "12331 condi 4 5\n", "12143 sisted 4 6\n", "12141 ruary 4 5\n", "23335 cers 4 4\n", "\n", "[1053 rows x 3 columns]\n" ] } ], "source": [ "title = 'IR'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2016-12-31T13:03:14.359976", "start_time": "2016-12-31T13:03:14.213337" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for LB:\n", " spell_error count word_length\n", "10799 mackey 296 6\n", "14763 hsi 247 3\n", "23192 halsted 165 7\n", "7895 ile 142 3\n", "13506 vitamines 110 9\n", "7689 lundell 97 7\n", "23332 kershaw 95 7\n", "21299 auley 92 5\n", "14910 pearsons 91 8\n", "24764 harner 90 6\n", "15591 stapp 84 5\n", "3514 'the 84 4\n", "483 pavlson 81 7\n", "17017 chas 79 4\n", "16922 soulwinning 76 11\n", "2498 'to 72 3\n", "23901 crittenton 71 10\n", "22240 twentyfive 69 10\n", "7922 chicagotrains 68 13\n", "10192 courtland 55 9\n", "11960 dannemora 55 9\n", "3304 bilhorn 53 7\n", "8226 burghart 52 8\n", "861 pawlow 51 6\n", "19634 thekla 49 6\n", "17951 gazeteer 49 8\n", "1116 papercovered 48 12\n", "14525 ridpath 45 7\n", "17780 whisler 44 7\n", "23230 tion 43 4\n", "21419 'of 43 3\n", "19572 jno 42 3\n", "23195 zada 42 4\n", "21072 'and 41 4\n", "12052 saloonkeeper 41 12\n", "6181 minutesfifty 41 12\n", "21951 jeffers 40 7\n", "15662 'we 40 3\n", "10797 laundryin 39 9\n", "12256 cyclopmdic 39 10\n", "1130 stillwater 39 10\n", "22870 edholm 38 6\n", "1473 colortype 38 9\n", "4732 desplaines 38 10\n", "5889 'phone 38 6\n", "17360 kohlsaat 36 8\n", "21261 eze 35 3\n", "20293 psa 35 3\n", "1314 pre 34 3\n", "19859 salle 34 5\n", "17907 agt 34 3\n", "6237 kedler 34 6\n", "10954 sevenjeweled 33 12\n", "24169 ment 33 4\n", "9666 vories 32 6\n", "16941 holaday 32 7\n", "4106 egal 31 4\n", "21445 employe 31 7\n", "15138 luyster 31 7\n", "19204 mal 31 3\n", "9024 cann 31 4\n", "20228 stantly 30 7\n", "13740 cyclopedic 30 10\n", "17421 tkt 30 3\n", "3382 sinsick 29 7\n", "12504 'in 29 3\n", "5758 kniskern 29 8\n", "1513 lbinsbale 29 9\n", "2407 oldfashioned 29 12\n", "12134 thos 29 4\n", "4958 leavitt 28 7\n", "20704 waltham 28 7\n", "10277 ranney 28 6\n", "7495 tyrer 28 5\n", "1668 printype 27 8\n", "1029 cyclopaedic 27 11\n", "5503 zoerb 26 5\n", "21218 cyclopxdic 26 10\n", "18339 gipsy 26 5\n", "4542 anb 26 3\n", "22593 anamosa 25 7\n", "10926 hurd 25 4\n", "4520 tiie 25 4\n", "8112 themnot 25 7\n", "24190 potosi 25 6\n", "20938 burleson 24 8\n", "16436 onehalf 24 7\n", "16214 medo 24 4\n", "12131 ufford 24 6\n", "5712 ballington 24 10\n", "14083 selfsupporting 24 14\n", "2491 employes 23 8\n", "20708 'neath 23 6\n", "3881 mis 23 3\n", "8730 rawlinson 23 9\n", "906 ili 22 3\n", "9282 ments 22 5\n", "530 cassimeres 22 10\n", "11650 appli 22 5\n", "3818 cbicago 21 7\n", "... ... ... ...\n", "20581 methat 4 6\n", "20569 tio 4 3\n", "20552 fausset 4 7\n", "15071 iiii 4 4\n", "20471 supervisionof 4 13\n", "20435 hitt 4 4\n", "4682 tlie 4 4\n", "15075 gpta 4 4\n", "11000 gowlie 4 6\n", "4759 thou'lt 4 7\n", "4790 follo 4 5\n", "10991 batonga 4 7\n", "4843 cornmunity 4 10\n", "20378 foodless 4 8\n", "4894 creegan 4 7\n", "10955 lation 4 6\n", "15145 uring 4 5\n", "20301 daybuthave 4 10\n", "4927 'whosoever 4 10\n", "4965 carscallen 4 10\n", "20269 editori 4 7\n", "20611 tae 4 3\n", "14918 shoop 4 5\n", "11107 easurements 4 11\n", "20997 ered 4 4\n", "21131 llo 4 3\n", "21119 usward 4 6\n", "4152 nating 4 6\n", "11154 deathdealing 4 12\n", "21103 itself' 4 7\n", "4158 brodder 4 7\n", "4232 him' 4 4\n", "4243 ral 4 3\n", "4290 'on 4 3\n", "4306 rro 4 3\n", "14769 simson 4 6\n", "4502 ister 4 5\n", "4331 georgeson 4 9\n", "4348 raws 4 4\n", "9397 naturedly 4 9\n", "14801 eskridge 4 8\n", "20853 subwe 4 5\n", "14901 bosphorus 4 9\n", "20816 brompton 4 8\n", "20802 teresting 4 9\n", "4447 poisonful 4 9\n", "20768 pharoah 4 7\n", "4994 nickles 4 7\n", "15168 wand'ring 4 9\n", "5050 muscatine 4 9\n", "19426 helzer 4 6\n", "19653 sdale 4 5\n", "10607 lusx 4 4\n", "19633 foss 4 4\n", "5715 bulow 4 5\n", "5725 samkoff 4 7\n", "5740 worldthe 4 8\n", "5845 lifea 4 5\n", "10554 haing 4 5\n", "19502 christian' 4 10\n", "19454 ilissionary 4 11\n", "19219 enger 4 5\n", "19724 peoplepeople 4 12\n", "19206 'since 4 6\n", "6006 mee 4 3\n", "15510 rurses 4 6\n", "10529 sandow 4 6\n", "15564 twills 4 6\n", "10490 'when 4 5\n", "15601 mahan 4 5\n", "6143 luvster 4 7\n", "6211 talcott 4 7\n", "6235 rhe 4 3\n", "19703 ught 4 4\n", "5680 'him 4 4\n", "10952 oddsize 4 7\n", "19934 appre 4 5\n", "10818 cowee 4 5\n", "15217 rawlins 4 7\n", "5103 lauck 4 5\n", "5170 leseme 4 6\n", "5219 sor 4 3\n", "20055 warrenville 4 11\n", "15224 smerdis 4 7\n", "20002 hostetter 4 9\n", "10714 olivers 4 7\n", "5241 keinhoff 4 8\n", "19920 ough 4 4\n", "19743 harrigan 4 8\n", "5462 vix 4 3\n", "5481 burford 4 7\n", "19854 tli 4 3\n", "10622 ese 4 3\n", "15295 soul' 4 5\n", "5568 ement 4 5\n", "10618 hebard 4 6\n", "10610 besetments 4 10\n", "19763 companyso 4 9\n", "5666 valdosta 4 8\n", "12789 conversationala 4 15\n", "\n", "[1352 rows x 3 columns]\n" ] } ], "source": [ "title = 'LB'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2016-12-31T14:17:50.857928", "start_time": "2016-12-31T14:17:50.726483" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for LH:\n", " spell_error count word_length\n", "3136 cornforth 267 9\n", "15571 tri 120 3\n", "25076 tion 119 4\n", "7320 nauheim 91 7\n", "4410 antituberculosis 87 16\n", "1492 pre 83 3\n", "8341 'ad 71 3\n", "18562 vitamine 64 8\n", "17815 onehalf 63 7\n", "24188 socalled 62 8\n", "8615 ment 61 4\n", "14628 quinin 61 6\n", "9133 lllll 59 5\n", "18254 ioo 54 3\n", "17120 roseburg 53 8\n", "19743 kee 52 3\n", "21068 friedmann 52 9\n", "25310 osler 51 5\n", "5812 karmatar 50 8\n", "22332 sanatoria 50 9\n", "16894 bulkley 45 7\n", "15295 drugless 45 8\n", "7572 antityphoid 45 11\n", "18465 chas 45 4\n", "14610 nebulizers 44 10\n", "697 imprenta 42 8\n", "13688 peruna 41 6\n", "7691 westfield 41 9\n", "7241 sitz 40 4\n", "1263 madronas 40 8\n", "14286 unvaccinated 40 12\n", "19067 cromie 40 6\n", "24603 bannerman 40 9\n", "23321 'the 40 4\n", "24044 picric 39 6\n", "26551 gulick 39 6\n", "17163 verdad 39 6\n", "26769 upto 38 4\n", "14193 frictionary 37 11\n", "12218 iiiii 37 5\n", "4451 achard 37 6\n", "15392 welltrained 37 11\n", "22530 nozaleda 37 8\n", "7479 bellair 37 7\n", "22633 bournville 36 10\n", "11781 kinau 36 5\n", "1010 ili 35 3\n", "20645 hindhede 35 8\n", "13161 mal 34 3\n", "21293 lorand 33 6\n", "15525 herter 33 6\n", "2016 goldberger 32 10\n", "13784 ful 32 3\n", "25355 peroxid 32 7\n", "4251 mis 32 3\n", "1903 pellagrins 32 10\n", "22429 purin 32 5\n", "12659 openair 31 7\n", "17721 keech 31 5\n", "5275 welch's 31 7\n", "16821 mahon 30 5\n", "2990 ellamont 30 8\n", "5844 wellknown 29 9\n", "12134 collum 29 6\n", "16274 iiii 29 4\n", "4614 musselman 29 9\n", "4492 cornaro 29 7\n", "9263 ptomain 29 7\n", "2310 ini 28 3\n", "16644 nal 28 3\n", "1857 llllll 27 6\n", "20777 electriclight 27 13\n", "15724 ridpath 27 7\n", "6800 voit 27 4\n", "698 twentyfour 27 10\n", "11617 ith 27 3\n", "22831 'and 27 4\n", "10559 thermo 26 6\n", "1055 ent 26 3\n", "22525 canners 26 7\n", "21639 omprising 26 9\n", "23200 'of 26 3\n", "23846 twentythird 25 11\n", "20865 nonmeat 25 7\n", "18859 guilbert 25 8\n", "13211 doran 25 5\n", "16953 salvarsan 25 9\n", "24054 twentyfive 25 10\n", "967 pawlow 24 6\n", "20870 sha 24 3\n", "12354 whalebones 24 10\n", "16518 deathrate 24 9\n", "6683 rosenau 24 7\n", "16183 rane 24 4\n", "14826 misbranded 24 10\n", "1327 moneyorder 24 10\n", "19321 woodhead 23 8\n", "15124 iet 23 3\n", "4145 healt 23 5\n", "19292 ealth 23 5\n", "... ... ... ...\n", "19268 lifr 4 4\n", "19282 recanned 4 8\n", "19318 nyassaland 4 10\n", "19399 timehonored 4 11\n", "19535 paso 4 4\n", "19546 carbo 4 5\n", "16064 gipsy 4 5\n", "16022 stines 4 6\n", "10586 hydrtherapy 4 11\n", "12259 harken 4 6\n", "11570 iiiiiiiiiiiiiiii 4 16\n", "11751 koren 4 5\n", "11755 pharmacopceia 4 13\n", "11802 ress 4 4\n", "11812 thera 4 5\n", "11854 icebag 4 6\n", "11950 coldmitten 4 10\n", "12043 tremely 4 7\n", "12207 stracts 4 7\n", "12396 anc 4 3\n", "15995 mak 4 3\n", "12399 ihe 4 3\n", "12573 heatand 4 7\n", "12597 inhalatorium 4 12\n", "12710 antiputrefactive 4 16\n", "12751 cokord 4 6\n", "12904 woodalcohol 4 11\n", "12906 piki 4 4\n", "12916 almostautomatically 4 19\n", "12970 cffl 4 4\n", "11514 igi 4 3\n", "11506 hono 4 4\n", "11431 northend 4 8\n", "11425 ivr 4 3\n", "10613 opment 4 6\n", "10727 kathrina 4 8\n", "10734 bodyand 4 7\n", "10747 litform 4 7\n", "10771 wun 4 3\n", "10810 ctsayear 4 8\n", "10935 rhin 4 4\n", "10961 ftf 4 3\n", "10979 illissionary 4 12\n", "11057 fernet 4 6\n", "11101 appa 4 4\n", "11103 seidlitz 4 8\n", "11110 ductory 4 7\n", "11189 soyer 4 5\n", "11196 gerontic 4 8\n", "11278 darnall 4 7\n", "11288 ninetyseven 4 11\n", "11293 oot 4 3\n", "11338 wageearners 4 11\n", "12979 payson 4 6\n", "12984 helsingfors 4 11\n", "12998 ille 4 4\n", "14535 lackawanna 4 10\n", "14620 gooa 4 4\n", "14711 nificant 4 8\n", "14717 fli 4 3\n", "14727 safetypins 4 10\n", "14796 ife 4 3\n", "14855 mor 4 3\n", "14904 veiller 4 7\n", "15249 selfsupporting 4 14\n", "15304 lene 4 4\n", "15429 ofdoor 4 6\n", "15441 homekeeper 4 10\n", "15465 mei 4 3\n", "15502 ake 4 3\n", "15610 bons 4 4\n", "15759 lnd 4 3\n", "15782 toif 4 4\n", "15846 samado 4 6\n", "15877 combatting 4 10\n", "15994 erly 4 4\n", "14560 fre 4 3\n", "14487 mment 4 5\n", "13108 conserver 4 9\n", "14408 tial 4 4\n", "13175 cepted 4 6\n", "13189 gorst 4 5\n", "13207 clubb 4 5\n", "13228 flueless 4 8\n", "13242 iealth 4 6\n", "13250 salud 4 5\n", "13274 rettger 4 7\n", "13276 schlickeysen 4 12\n", "13282 ook 4 3\n", "13377 sulting 4 7\n", "13764 bowsfield 4 9\n", "13877 portunity 4 9\n", "13976 ficient 4 7\n", "13982 huchard 4 7\n", "14050 trom 4 4\n", "14112 stockard 4 8\n", "14323 electriclighted 4 15\n", "14377 tingfang 4 8\n", "14401 cgdking 4 7\n", "27242 warbasse 4 8\n", "\n", "[1606 rows x 3 columns]\n" ] } ], "source": [ "title = 'LH'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2016-12-31T16:39:20.116962", "start_time": "2016-12-31T16:39:19.936216" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for LibM:\n", " spell_error count word_length\n", "412 gallivan 61 8\n", "3949 religio 48 7\n", "1554 miraglia 45 8\n", "6330 tion 43 4\n", "8822 cxsar 40 5\n", "7506 neander 38 7\n", "5181 charta 37 6\n", "2382 ment 32 4\n", "6487 chas 30 4\n", "257 seventhday 29 10\n", "1233 mutchler 28 8\n", "1977 pre 25 3\n", "581 heyburn 23 7\n", "2949 connell 21 7\n", "3971 'the 20 4\n", "7138 haverhill 19 9\n", "766 brevities 19 9\n", "3577 eze 18 3\n", "138 siegel 17 6\n", "7015 interchurch 17 11\n", "3021 cockran 16 7\n", "3185 parte 15 5\n", "4197 'of 15 3\n", "5169 socalled 15 8\n", "2819 sunclay 14 7\n", "169 robb 14 4\n", "9122 gaw 14 3\n", "5880 connorton 14 9\n", "6172 bastile 13 7\n", "5787 bonzano 13 7\n", "7458 fairmount 13 9\n", "5397 claxton 13 7\n", "4754 roseburg 13 8\n", "7545 krieger 13 7\n", "362 mmm 13 3\n", "3340 churchand 13 9\n", "6331 andstate 12 8\n", "8323 hamurabi 12 8\n", "6203 ioo 12 3\n", "3145 smoot 12 5\n", "8617 bannerman 12 9\n", "5308 medo 12 4\n", "8720 ligious 12 7\n", "3723 tions 11 5\n", "2853 gaynor 11 6\n", "5424 rooker 11 6\n", "8016 sundaylaw 11 9\n", "1670 vagh 11 4\n", "695 ernment 11 7\n", "6784 kerens 10 6\n", "4983 libertyloving 10 13\n", "5802 hanly 10 5\n", "3149 lllll 10 5\n", "6424 brien 10 5\n", "2227 prin 10 4\n", "845 gantenbein 10 10\n", "7444 borah 10 5\n", "421 elsnath 10 7\n", "1221 ber 10 3\n", "7523 clinedinst 10 10\n", "3063 mayhew 10 6\n", "7659 twentyfive 10 10\n", "8726 ashby 10 5\n", "8134 cathedra 10 8\n", "1677 cxxxiv 9 6\n", "9031 religi 9 6\n", "8935 ringgold 9 8\n", "377 farreaching 9 11\n", "5688 filiated 9 8\n", "4039 ellamont 9 8\n", "2852 ridpath 9 7\n", "2883 upsall 9 6\n", "6417 frisons 9 7\n", "884 twentyfour 9 10\n", "5579 libert 9 6\n", "2468 ile 9 3\n", "2536 honorius 9 8\n", "3386 tithingman 9 10\n", "5346 diaz 8 4\n", "2851 nozaleda 8 8\n", "5507 ligion 8 6\n", "2677 faneuil 8 7\n", "2958 dagonya 8 7\n", "3749 cmsar 8 5\n", "4812 verdad 8 6\n", "3461 minton 8 6\n", "3876 bartholdt 8 9\n", "3635 woolman 8 7\n", "8300 lil 8 3\n", "554 bourke 8 6\n", "3547 sundayclosing 8 13\n", "8022 ttf 8 3\n", "7984 temporalities 8 13\n", "7779 ili 8 3\n", "4211 stitution 8 9\n", "4251 erty 8 4\n", "4295 laurin 8 6\n", "6643 tiie 8 4\n", "3100 burleson 7 8\n", "4462 ity 7 3\n", "... ... ... ...\n", "8165 alister 5 7\n", "5055 thro 5 4\n", "621 millan 5 6\n", "591 pia 5 3\n", "484 ministerium 4 11\n", "1365 ite 4 3\n", "9180 firstand 4 8\n", "1344 saboth 4 6\n", "8625 stanchly 4 8\n", "199 iie 4 3\n", "204 tkg 4 3\n", "8752 pers 4 4\n", "7186 erance 4 6\n", "9161 pereira 4 7\n", "7157 cutchen 4 7\n", "7348 millington 4 10\n", "7321 querque 4 7\n", "7972 mittee 4 6\n", "511 troduced 4 8\n", "8433 cwsar 4 5\n", "7973 carbo 4 5\n", "751 aked 4 4\n", "723 tlf 4 3\n", "7722 canalejas 4 9\n", "1014 wetmore 4 7\n", "7568 kihrrtu 4 7\n", "541 chainless 4 9\n", "1021 shi 4 3\n", "7357 lello 4 5\n", "8463 brownson 4 8\n", "1034 allister 4 8\n", "1102 francesco 4 9\n", "539 labor' 4 6\n", "1224 ketcham 4 7\n", "8565 hosius 4 6\n", "1503 ayear 4 5\n", "3750 prima 4 5\n", "6334 grosscup 4 8\n", "1518 'for 4 4\n", "4449 crozer 4 6\n", "5052 nct 4 3\n", "3075 ofi 4 3\n", "4902 tkr 4 3\n", "3088 duced 4 5\n", "4771 nem 4 3\n", "3130 duval 4 5\n", "4753 ciple 4 5\n", "4740 dred 4 4\n", "4695 attleboro 4 9\n", "4663 legisla 4 7\n", "3263 servance 4 8\n", "4655 sulzer 4 6\n", "4590 tle 4 3\n", "4342 iti 4 3\n", "1567 botsford 4 8\n", "4275 creedal 4 7\n", "4243 pulsory 4 7\n", "4222 scriptions 4 10\n", "3432 bluelaws 4 8\n", "3498 christison 4 10\n", "4145 selfevident 4 11\n", "4081 atheneum 4 8\n", "4066 henshaw 4 7\n", "3632 mee 4 3\n", "4036 casar 4 5\n", "3968 gasless 4 7\n", "3961 aweteranian 4 11\n", "3660 iated 4 5\n", "5066 sannella 4 8\n", "5103 selfsacrifice 4 13\n", "5121 ferred 4 6\n", "5146 exer 4 4\n", "6893 tlie 4 4\n", "6852 lation 4 6\n", "6776 impor 4 5\n", "6747 liberi 4 6\n", "6536 imm 4 3\n", "6496 usconstitution 4 14\n", "1674 kai 4 3\n", "6421 benziger 4 8\n", "1676 twentyfirst 4 11\n", "6372 mehmed 4 6\n", "1687 iow 4 3\n", "1791 torchlights 4 11\n", "1887 reichstag 4 9\n", "6094 tive 4 4\n", "6017 tant 4 4\n", "1999 temere 4 6\n", "2028 fide 4 4\n", "2317 sweetser 4 8\n", "2458 murietta 4 8\n", "2519 llllll 4 6\n", "5453 eral 4 4\n", "2630 lished 4 6\n", "2783 ereign 4 6\n", "5314 porta 4 5\n", "2841 dowling 4 7\n", "2913 steffens 4 8\n", "3069 libertas 4 8\n", "120 sov 4 3\n", "\n", "[311 rows x 3 columns]\n" ] } ], "source": [ "title = 'LibM'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2016-12-31T16:51:35.019413", "start_time": "2016-12-31T16:51:34.935010" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for LUH:\n", " spell_error count word_length\n", "7270 vagh 663 4\n", "19685 ords 471 4\n", "11408 drury 455 5\n", "18121 chas 443 4\n", "10254 suda 353 4\n", "19610 shelbyville 284 11\n", "26642 herrin 275 6\n", "6736 conaughey 271 9\n", "22351 kimberlin 266 9\n", "10191 plake 241 5\n", "18855 wanteda 235 7\n", "22677 kingman 228 7\n", "12336 tri 222 3\n", "10285 mitzelfelt 217 10\n", "5701 rothbury 199 8\n", "14019 devereaux 195 9\n", "23016 coldwater 195 9\n", "4784 urbandale 194 9\n", "20007 tillie 193 6\n", "21806 englewood 182 9\n", "9643 dimondale 176 9\n", "1097 seventhday 175 10\n", "17131 ruh 170 3\n", "16589 mahan 166 5\n", "25912 pengelly 166 8\n", "1093 greenbush 162 9\n", "12595 mattoon 161 7\n", "6175 kittleson 152 9\n", "15862 clellan 151 7\n", "18366 kinderhook 151 10\n", "15963 tatton 150 6\n", "12688 gowen 149 5\n", "6649 palmiter 148 8\n", "5509 herrington 145 10\n", "23118 clintonville 145 12\n", "17297 rapson 144 6\n", "8822 bluford 143 7\n", "22359 unionville 141 10\n", "25533 clenathan 141 9\n", "22579 colton 139 6\n", "5893 horr 138 4\n", "23068 alaiedon 137 8\n", "5209 elmwood 137 7\n", "22867 'the 137 4\n", "3834 emerick 137 7\n", "4752 scand 133 5\n", "9284 trufant 131 7\n", "17153 palo 131 4\n", "23333 underhill 131 9\n", "14917 bloomville 127 10\n", "8513 sabbathschool 126 13\n", "18774 inwood 126 6\n", "4989 watrousville 122 12\n", "10260 sunman 122 6\n", "25352 crandon 119 7\n", "21893 soo 117 3\n", "17430 scholz 113 6\n", "4447 addis 110 5\n", "6805 bello 108 5\n", "2592 hintz 108 5\n", "26501 halderson 108 9\n", "7108 cleora 104 6\n", "13058 bernitt 104 7\n", "20301 lundquist 103 9\n", "4083 mis 103 3\n", "10595 rideout 102 7\n", "22354 eachern 102 7\n", "12931 thos 101 4\n", "766 brethern 99 8\n", "8640 coppock 97 7\n", "11870 mina 96 4\n", "18581 garber 92 6\n", "11365 possman 90 7\n", "17193 bissett 89 7\n", "20526 ludington 89 9\n", "20506 guire 88 5\n", "19949 pontoosuc 87 9\n", "20140 fortville 87 9\n", "15519 zeba 84 4\n", "375 churchschool 83 12\n", "13171 leetsville 83 10\n", "5753 evitts 80 6\n", "22734 'of 80 3\n", "17184 truf 80 4\n", "7891 erald 78 5\n", "20413 rocklane 77 8\n", "3719 junct 77 5\n", "14302 barryton 75 8\n", "15158 remsen 74 6\n", "26513 wegtworth 73 9\n", "22022 elkton 73 6\n", "26405 lausten 72 7\n", "9717 twombly 70 7\n", "20403 maplegrove 70 10\n", "24034 orde 69 4\n", "5534 hardt 69 5\n", "9367 banty 68 5\n", "23662 twentyfive 67 10\n", "127 dighton 66 7\n", "900 crail 66 5\n", "... ... ... ...\n", "22769 mancelona 4 9\n", "22772 dar 4 3\n", "22807 belville 4 8\n", "2338 erickle 4 7\n", "9214 tbe 4 3\n", "2605 jes 4 3\n", "2870 churchmembership 4 16\n", "2636 madson 4 6\n", "2850 fourty 4 6\n", "13325 nathu 4 5\n", "9074 rohr 4 4\n", "13304 cuaig 4 5\n", "22325 ofdoors 4 7\n", "9087 prickitt 4 8\n", "9109 convis 4 6\n", "13208 ortonville 4 10\n", "2744 gladto 4 6\n", "2723 secretarytreasurer 4 18\n", "22390 delp 4 4\n", "22400 publicatior 4 11\n", "2680 whittmore 4 9\n", "9139 cakainion 4 9\n", "2648 het 4 3\n", "8657 literture 4 9\n", "21412 delc 4 4\n", "21401 tithepaying 4 11\n", "14804 chism 4 5\n", "20295 mov 4 3\n", "20297 mitzelfeldt 4 11\n", "4273 nfr 4 3\n", "14718 bently 4 6\n", "8036 nobleville 4 10\n", "8063 biederwolf 4 10\n", "8073 nieetings 4 9\n", "14666 hasbeen 4 7\n", "8075 'od 4 3\n", "20427 caipiras 4 8\n", "4135 urbina 4 6\n", "14606 goblesville 4 11\n", "14603 rone 4 4\n", "3977 waddell 4 7\n", "8092 schoolcraft 4 11\n", "7990 tennesee 4 8\n", "20222 hedwig 4 6\n", "20530 heartsearching 4 14\n", "14879 loami 4 5\n", "4508 konechny 4 8\n", "4502 walkerto 4 8\n", "19891 kirkham 4 7\n", "4475 parshall 4 8\n", "4458 greid 4 5\n", "19942 tunnell 4 7\n", "14930 onal 4 4\n", "7811 everet 4 6\n", "20008 interlineations 4 15\n", "20013 augtst 4 6\n", "20055 elkart 4 6\n", "4353 thetime 4 7\n", "4339 liij 4 4\n", "4336 ering 4 5\n", "20198 ppe 4 3\n", "3911 srawberry 4 9\n", "20584 helzer 4 6\n", "8590 toour 4 5\n", "3475 vella 4 5\n", "21084 schuh 4 5\n", "8405 schoenfeld 4 10\n", "14176 arlie 4 5\n", "21112 posession 4 9\n", "21114 uptegrove 4 9\n", "21116 herkimer 4 8\n", "3413 nem 4 3\n", "21175 otho 4 4\n", "3381 valdamar 4 8\n", "14090 'other 4 6\n", "21331 ddress 4 6\n", "21334 pso 4 3\n", "3360 haughev 4 7\n", "8582 rahr 4 4\n", "21384 berd 4 4\n", "14184 ardenne 4 7\n", "14198 'church 4 7\n", "14527 lorr 4 4\n", "20979 oggs 4 4\n", "14443 colson 4 6\n", "3853 repitched 4 9\n", "8210 finnell 4 7\n", "3733 irresistable 4 12\n", "3687 fahrion 4 7\n", "20792 amberg 4 6\n", "3641 apolis 4 6\n", "14391 wideawake 4 9\n", "3619 year' 4 5\n", "20908 granoila 4 8\n", "14377 hom 4 3\n", "8258 oeakainion 4 10\n", "3545 baraga 4 6\n", "20973 arrangments 4 11\n", "8314 sparren 4 7\n", "5076 life' 4 5\n", "\n", "[2232 rows x 3 columns]\n" ] } ], "source": [ "title = 'LUH'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2017-01-01T16:00:18.518405", "start_time": "2017-01-01T16:00:18.349319" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for NMN:\n", " spell_error count word_length\n", "2957 aro 89 3\n", "9302 leetsville 28 10\n", "5842 willaman 26 8\n", "176 dighton 22 7\n", "3643 evart 21 5\n", "2724 soo 20 3\n", "6990 clellan 19 7\n", "4149 myrta 18 5\n", "4662 altho 18 5\n", "7265 manistee 15 8\n", "8423 beeler 15 6\n", "5608 havo 14 4\n", "523 sho 12 3\n", "5262 tae 12 3\n", "1493 armilda 12 7\n", "1123 thoy 12 4\n", "7469 tne 12 3\n", "1710 vincinity 11 9\n", "735 lich 11 4\n", "8967 thos 11 4\n", "726 ludington 11 9\n", "6487 aee 11 3\n", "2395 ich 9 3\n", "4644 ence 9 4\n", "8887 blesser 9 7\n", "8851 lcrd 9 4\n", "7784 ichigan 9 7\n", "1109 confe 9 5\n", "6809 wcrk 9 4\n", "7731 sabath 8 6\n", "1871 thoir 8 5\n", "2744 sabbathschool 8 13\n", "2064 pre 8 3\n", "8343 irs 8 3\n", "8731 ork 8 3\n", "6430 nee 8 3\n", "8466 ent 8 3\n", "4616 ith 7 3\n", "7838 anc 7 3\n", "2461 recomend 7 8\n", "2197 ehe 7 3\n", "8606 'he 7 3\n", "5589 eople 7 5\n", "6022 rth 7 3\n", "4167 ood 6 3\n", "3932 fcr 6 3\n", "3827 pooplo 6 6\n", "5757 ths 6 3\n", "5865 lichigan 6 8\n", "5989 stedman 6 7\n", "6566 manistique 6 10\n", "3589 bracebridge 6 11\n", "9607 djork 6 5\n", "8696 baurain 6 7\n", "8804 scottvillo 6 10\n", "9311 sprague 6 7\n", "5365 thr 5 3\n", "922 sablath 5 7\n", "8898 yoar 5 4\n", "1688 sdhool 5 6\n", "8933 nany 5 4\n", "9285 hee 5 3\n", "7580 laketon 5 7\n", "1351 ilichigan 5 9\n", "5789 sabbatheschool 5 14\n", "1024 lan 5 3\n", "5887 oportunity 5 10\n", "9316 ime 5 3\n", "1835 ars 5 3\n", "6232 ele 5 3\n", "6309 onference 5 9\n", "6319 helmer 5 6\n", "732 shoulu 5 6\n", "669 millan 5 6\n", "6438 nal 5 3\n", "6486 ers 5 3\n", "543 brothor 5 7\n", "362 tht 5 3\n", "6793 nester 5 6\n", "6848 vory 5 4\n", "5267 aed 5 3\n", "6942 manton 5 6\n", "5172 liesick 5 7\n", "2276 chas 5 4\n", "3191 ther 5 4\n", "3119 tnat 5 4\n", "8720 eichigan 5 8\n", "2606 ake 5 3\n", "8009 assionary 5 9\n", "7986 ust 5 3\n", "4324 woula 5 5\n", "4346 ler 5 3\n", "4405 eas 5 3\n", "2603 helvig 5 6\n", "5143 ment 5 4\n", "4803 triplett 5 8\n", "4840 schcol 5 6\n", "4890 linos 5 5\n", "2068 dingman 5 7\n", "2038 nen 5 3\n", "7477 lilah 4 5\n", "7166 ick 4 3\n", "8689 socioty 4 7\n", "9508 feom 4 4\n", "8560 hav 4 3\n", "9345 eeople 4 6\n", "7445 sehool 4 6\n", "8289 haee 4 4\n", "8771 theie 4 5\n", "8250 t'e 4 3\n", "9170 a'e 4 3\n", "7910 wil 4 3\n", "7842 esick 4 5\n", "8796 ycu 4 3\n", "8621 otc 4 3\n", "86 shee 4 4\n", "6862 sistor 4 6\n", "3255 vith 4 4\n", "2891 toskey 4 6\n", "2664 somo 4 4\n", "2622 tio 4 3\n", "2582 tay 4 3\n", "2553 ile 4 3\n", "2543 'or 4 3\n", "2047 whon 4 4\n", "1762 lnrd 4 4\n", "1527 fetoskey 4 8\n", "1497 liko 4 4\n", "1451 shoula 4 6\n", "1420 ple 4 3\n", "1367 ren 4 3\n", "1313 potoskoy 4 8\n", "828 aith 4 4\n", "355 ang 4 3\n", "264 seventhday 4 10\n", "2938 alth 4 4\n", "3334 bain 4 4\n", "6697 achigan 4 7\n", "3402 preyer 4 6\n", "6627 tir 4 3\n", "6412 ance 4 4\n", "5775 kenney 4 6\n", "5732 euickly 4 7\n", "5479 ond 4 3\n", "5392 ussionary 4 9\n", "5383 yoe 4 3\n", "5269 eee 4 3\n", "4116 'the 4 4\n", "4093 goffar 4 6\n", "4055 mber 4 4\n", "4001 onavay 4 6\n", "3862 tions 4 5\n", "3807 timo 4 4\n", "3785 oraway 4 6\n", "3756 ame 4 3\n", "3474 thet 4 4\n", "5129 peoele 4 6\n" ] } ], "source": [ "title = 'NMN'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2017-01-01T16:34:58.934623", "start_time": "2017-01-01T16:34:58.818789" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for PHJ:\n", " spell_error count word_length\n", "697 sel 255 3\n", "19978 ournal 129 6\n", "278 societyl 80 8\n", "16362 munn 73 4\n", "4346 allerton 58 8\n", "15963 misses' 56 7\n", "17450 tion 54 4\n", "3863 urnal 51 5\n", "17316 teviperance 50 11\n", "2849 'em 47 3\n", "16736 cloe 47 4\n", "11718 fahr 46 4\n", "10923 sitz 45 4\n", "2198 pre 45 3\n", "14750 'the 39 4\n", "20103 functual 38 8\n", "18851 societya 37 8\n", "6270 onehalf 36 7\n", "8280 societyj 35 8\n", "1905 jourxal 34 7\n", "12956 ment 34 4\n", "13211 cigaret 34 7\n", "7143 vilas 34 5\n", "12156 weiherweg 33 9\n", "7734 monthlydevoted 32 14\n", "2492 firstclass 30 10\n", "15761 societys 30 8\n", "10331 dyo 29 3\n", "16018 thermo 29 6\n", "8312 actina 29 6\n", "10109 preventivesimple 28 16\n", "6886 ance 27 4\n", "19427 rowell 27 6\n", "188 robb 26 4\n", "18781 jenness 26 7\n", "238 ful 26 3\n", "7267 chas 26 4\n", "19965 thos 25 4\n", "19151 societym 25 8\n", "13022 fehr 24 4\n", "18703 societyc 24 8\n", "6167 recipespost 24 11\n", "1149 dio 24 3\n", "9535 powes 24 5\n", "6870 ralston 23 7\n", "5665 cigarets 23 8\n", "7948 japana 23 6\n", "2510 stinson 22 7\n", "246 nelia 22 5\n", "9473 abbie 22 5\n", "11872 rodolph 22 7\n", "20550 washingall 21 10\n", "13242 soo 21 3\n", "8431 bahler 21 6\n", "4511 akersgaden 21 10\n", "6945 ioo 21 3\n", "16664 wyman 21 5\n", "14456 tions 20 5\n", "8468 fasteningwith 20 13\n", "10230 sah 20 3\n", "5982 adjustably 20 10\n", "10508 limbstroubles 20 13\n", "10824 rocka 20 5\n", "2062 drumm 19 5\n", "17540 easton 19 6\n", "5734 jou 18 3\n", "15970 vill 18 4\n", "10286 hechtman 18 8\n", "856 lld 18 3\n", "14263 carolinan 18 9\n", "271 vith 18 4\n", "4102 gauzes 18 6\n", "12091 clure 17 5\n", "13660 abouts 17 6\n", "2898 sansome 17 7\n", "19258 ventillation 17 12\n", "18032 ish 17 3\n", "9839 callyour 17 8\n", "840 hutchings 17 9\n", "13956 aimes 17 5\n", "15755 bloodvessels 17 12\n", "1379 depa 17 4\n", "10670 nuttygrains 17 11\n", "1099 dore 17 4\n", "4846 dodds 16 5\n", "3544 osed 16 4\n", "3087 diseasea 16 8\n", "16434 cambie 16 6\n", "17175 illy 16 4\n", "18367 ole 16 3\n", "12535 pennellsuydam 16 13\n", "2797 rnal 16 4\n", "6012 rorer 16 5\n", "8009 halfmorocco 16 11\n", "20650 demorest 16 8\n", "9656 rey 16 3\n", "6311 englandn 16 8\n", "19383 acific 16 6\n", "13936 nux 16 3\n", "8757 agt 16 3\n", "... ... ... ...\n", "11398 muchas 4 6\n", "12290 spongings 4 9\n", "11491 diretory 4 8\n", "11495 dere 4 4\n", "11642 correa 4 6\n", "11731 ertal 4 5\n", "11850 cise 4 4\n", "11861 ite 4 3\n", "12671 thinkin 4 7\n", "10774 oue 4 3\n", "10741 tht 4 3\n", "13189 tink 4 4\n", "10122 sleepingrooms 4 13\n", "13308 recamier 4 8\n", "13307 sicians 4 7\n", "13281 goodbut 4 7\n", "13234 keeley 4 6\n", "10298 pres't 4 6\n", "10335 wante 4 5\n", "10685 doin 4 4\n", "10380 logue 4 5\n", "10526 masse 4 5\n", "10578 murdock 4 7\n", "10620 broster 4 7\n", "10661 cata 4 4\n", "12801 ijouseleld 4 10\n", "8618 physiciani 4 10\n", "8606 oliveoil 4 8\n", "8521 ari 4 3\n", "14463 creelc 4 6\n", "5889 quired 4 6\n", "16016 repre 4 5\n", "15972 alabamad 4 8\n", "5979 englands 4 8\n", "6013 tts 4 3\n", "6172 medi 4 4\n", "6201 turbinated 4 10\n", "6230 eunson 4 6\n", "6310 ket 4 3\n", "15816 twentytwo 4 9\n", "15801 keeler 4 6\n", "15773 labarriere 4 10\n", "6460 rth 4 3\n", "6493 niemeyer 4 8\n", "6778 perience 4 8\n", "16069 trir 4 4\n", "5757 talofa 4 6\n", "5730 tarlets 4 7\n", "5236 m'clure 4 7\n", "5081 irv 4 3\n", "5084 kirkham 4 7\n", "5088 shust 4 5\n", "5221 cious 4 5\n", "16429 xit 4 3\n", "16368 masseed 4 7\n", "5341 kneipp 4 6\n", "5677 breethe 4 7\n", "5423 indi 4 4\n", "16252 'an 4 3\n", "5553 stockines 4 9\n", "5617 zoth 4 4\n", "5638 childrenwill 4 12\n", "11981 fralthfully 4 11\n", "6910 gwine 4 5\n", "15626 bress 4 5\n", "15419 vrooman 4 7\n", "14611 toa 4 3\n", "14839 dess 4 4\n", "8202 ial 4 3\n", "14720 cta 4 3\n", "8270 tobe 4 4\n", "14626 theonlysewingmachine 4 20\n", "8319 milfred 4 7\n", "8343 eatty 4 5\n", "8180 doan 4 4\n", "8384 'sw 4 3\n", "8464 neurine 4 7\n", "8484 ost 4 3\n", "14536 fcr 4 3\n", "14504 pintsch 4 7\n", "14468 sentinelone 4 11\n", "14845 esculapius 4 10\n", "8117 quartettes 4 10\n", "15399 'make 4 5\n", "7915 havergal 4 8\n", "15388 spect 4 5\n", "15375 tti 4 3\n", "7552 gauses 4 6\n", "7677 dillingham 4 10\n", "15335 altho 4 5\n", "15253 groshen 4 7\n", "15171 konut 4 5\n", "8072 cial 4 4\n", "15065 'if 4 3\n", "15014 mful 4 4\n", "14967 sanitarims 4 10\n", "8020 tne 4 3\n", "14944 wery 4 4\n", "14936 illustratedjust 4 15\n", "16 foo 4 3\n", "\n", "[852 rows x 3 columns]\n" ] } ], "source": [ "title = 'PHJ'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2017-01-03T12:43:37.042290", "start_time": "2017-01-03T12:43:36.996402" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for PTAR:\n", " spell_error count word_length\n", "71 ver 78 3\n", "2050 'the 49 4\n", "3524 ment 46 4\n", "2269 holies 39 6\n", "3671 tion 34 4\n", "5131 'of 23 3\n", "2944 storrs 23 6\n", "5342 eze 17 3\n", "2894 ments 17 5\n", "5260 'to 13 3\n", "933 ful 12 3\n", "2946 ninevah 12 7\n", "2558 thi 12 3\n", "5761 tuary 12 5\n", "419 nant 11 4\n", "2572 pre 11 3\n", "5570 tions 11 5\n", "557 ture 10 4\n", "2341 ble 10 3\n", "2983 gon 9 3\n", "1623 hagion 9 6\n", "563 ofthe 9 5\n", "5716 'was 9 4\n", "4063 'that 9 5\n", "105 ple 9 3\n", "2352 mal 9 3\n", "5126 vers 8 4\n", "4718 'and 8 4\n", "4141 dence 8 5\n", "5472 waymark 7 7\n", "1580 'is 7 3\n", "378 rusalem 7 7\n", "5404 topsham 7 7\n", "303 jno 6 3\n", "2198 ernacle 6 7\n", "652 ile 6 3\n", "270 lxxviii 6 7\n", "4830 ond 6 3\n", "2398 lviii 6 5\n", "815 ceive 6 5\n", "1385 macknight 6 9\n", "3433 'his 6 4\n", "3002 quities 6 7\n", "464 ved 6 3\n", "3661 inthe 6 5\n", "1930 tience 6 6\n", "5435 sus 6 3\n", "4039 jeru 5 4\n", "3680 tbe 5 3\n", "4114 wil 5 3\n", "4862 ery 5 3\n", "3444 lieve 5 5\n", "3287 cond 5 4\n", "3746 numberer 5 8\n", "3373 ance 5 4\n", "3523 binius 5 6\n", "3980 ish 5 3\n", "3790 fassett 5 7\n", "3556 provi 5 5\n", "3440 withthe 5 7\n", "5640 pickands 5 8\n", "476 lished 5 6\n", "446 xlv 5 3\n", "937 'their 5 6\n", "5528 chronologers 5 12\n", "5186 eis 5 3\n", "1877 swer 5 4\n", "4905 hovah 4 5\n", "4581 medo 4 4\n", "5713 lxv 4 3\n", "5648 tes 4 3\n", "4558 daythe 4 6\n", "5498 'in 4 3\n", "5485 peo 4 3\n", "5466 pired 4 5\n", "4591 mation 4 6\n", "5398 brn 4 3\n", "5378 sation 4 6\n", "4725 'from 4 5\n", "5373 enq 4 3\n", "4763 exthe 4 5\n", "4776 lieved 4 6\n", "252 'were 4 5\n", "4779 fect 4 4\n", "4288 jerico 4 6\n", "5235 vation 4 6\n", "5251 dren 4 4\n", "537 tures 4 5\n", "4197 pinney 4 6\n", "1302 mit 4 3\n", "2072 ged 4 3\n", "1998 tant 4 4\n", "1975 theni 4 5\n", "1807 vir 4 3\n", "1644 circleville 4 11\n", "1430 cii 4 3\n", "1233 ral 4 3\n", "2374 ther 4 4\n", "1149 newmoon 4 7\n", "1095 millenium 4 9\n", "744 sary 4 4\n", "662 itt 4 3\n", "615 nology 4 6\n", "593 mandment 4 8\n", "2161 thefulfillment 4 14\n", "2441 ent 4 3\n", "4026 worlda 4 6\n", "3369 'or 4 3\n", "3589 'by 4 3\n", "332 millerism 4 9\n", "3518 'but 4 4\n", "3497 cxxxii 4 6\n", "543 refered 4 7\n", "3418 hea 4 3\n", "3293 'be 4 3\n", "2536 'you 4 4\n", "103 tified 4 6\n", "2994 shimper 4 7\n", "379 rael 4 4\n", "2831 thatthe 4 7\n", "2640 yond 4 4\n", "442 truththe 4 8\n", "3214 sabbathday 4 10\n" ] } ], "source": [ "title = 'PTAR'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2017-01-03T15:40:10.943297", "start_time": "2017-01-03T15:40:10.675218" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for PUR:\n", " spell_error count word_length\n", "33982 tion 627 4\n", "180 elhany 490 6\n", "46410 seventhday 448 10\n", "53016 ords 407 4\n", "11275 ence 380 4\n", "6552 ment 308 4\n", "48811 chas 304 4\n", "39826 sabbathschool 297 13\n", "31687 pherson 287 7\n", "45912 ference 281 7\n", "3862 'the 273 4\n", "27230 verah 260 5\n", "39528 secretarym 236 10\n", "40940 ers 231 3\n", "13349 ber 222 3\n", "9927 pepperwood 222 10\n", "48419 ple 218 3\n", "39220 sda 209 3\n", "24261 twentyfifth 167 11\n", "11802 sionary 163 7\n", "28419 mis 161 3\n", "48884 'of 155 3\n", "6448 secretaryj 154 10\n", "23279 pre 152 3\n", "17531 tions 152 5\n", "32643 agentf 150 6\n", "22085 treasurerb 149 10\n", "34739 committeee 147 10\n", "52274 kinleyville 146 11\n", "35218 'to 133 3\n", "28351 humbert 128 7\n", "51715 presidente 126 10\n", "52 ary 126 3\n", "22821 phcenix 125 7\n", "15008 eral 124 4\n", "7235 ent 122 3\n", "5063 pac 121 3\n", "22015 'and 120 4\n", "37332 pasqual 119 7\n", "35741 ful 116 3\n", "33879 kibbin 115 6\n", "45053 'union 115 6\n", "11697 cific 108 5\n", "30462 californianevada 105 16\n", "43888 hebard 103 6\n", "34920 ance 103 4\n", "26007 edendale 101 8\n", "24694 fornia 100 6\n", "32992 rulison 97 7\n", "49155 ern 94 3\n", "22475 bers 92 4\n", "38531 edu 89 3\n", "22126 guire 89 5\n", "41044 ments 86 5\n", "46758 sabbathkeepers 85 14\n", "30029 belvail 83 7\n", "48966 twentyfive 80 10\n", "1549 dren 80 4\n", "40644 ble 79 3\n", "35192 peo 78 3\n", "28546 ture 74 4\n", "51930 committeej 71 10\n", "2443 paign 71 5\n", "6527 ters 70 4\n", "48943 tressa 70 6\n", "30607 mayers 70 6\n", "44518 ceived 69 6\n", "17289 helligso 69 8\n", "25107 nia 68 3\n", "32574 fice 68 4\n", "33866 lege 68 4\n", "22682 secretaryw 66 10\n", "29813 pencilgrams 66 11\n", "27957 presidentj 65 10\n", "2702 'in 65 3\n", "38441 sions 65 5\n", "30328 terest 64 6\n", "41957 ning 64 4\n", "4972 kenzie 61 6\n", "6053 spriggs 60 7\n", "43637 churchschool 60 12\n", "43073 desmarets 59 9\n", "53525 snideman 58 8\n", "42711 nis 58 3\n", "37847 ery 57 3\n", "7652 tional 57 6\n", "21645 findley 57 7\n", "1228 sabbathschools 56 14\n", "39009 wanteda 56 7\n", "5502 inthe 55 5\n", "26236 onehalf 55 7\n", "4947 tarium 54 6\n", "20712 ordrs 54 5\n", "43747 ventist 53 7\n", "22750 nellis 53 6\n", "29221 althaus 53 7\n", "38934 ren 52 3\n", "53519 ottie 52 5\n", "44458 ioo 52 3\n", "12950 tive 51 4\n", "... ... ... ...\n", "19490 arwill 1 6\n", "19491 ocality 1 7\n", "19492 aeefrinvaued 1 12\n", "19413 resociation 1 11\n", "19412 filllah 1 7\n", "19411 tearstained 1 11\n", "19341 illhers 1 7\n", "19332 ketc'aum 1 8\n", "19333 convenone 1 9\n", "19334 everythingwith 1 14\n", "19335 iereby 1 6\n", "19336 twentyexpenditure 1 17\n", "19337 andkoss 1 7\n", "19338 'tpposing 1 9\n", "19339 calimissions 1 12\n", "19340 seventhnight 1 12\n", "19342 liabit 1 6\n", "19328 patriif 1 7\n", "19344 trueas 1 6\n", "19348 framily 1 7\n", "19349 iffeathing 1 10\n", "19350 vey 1 3\n", "19351 broththen 1 9\n", "19352 whichhave 1 9\n", "19353 knowlacquainted 1 15\n", "19354 encouragconsideration 1 21\n", "19355 eirpense 1 8\n", "19330 ovotilt 1 7\n", "19327 agpapers 1 8\n", "19357 comcountry 1 10\n", "19313 eservices 1 9\n", "19301 stiperintendent 1 15\n", "19302 likelyto 1 8\n", "19303 septemsending 1 13\n", "19304 unaca 1 5\n", "19305 thalt 1 5\n", "19307 bringhere 1 9\n", "19308 mewith 1 6\n", "19309 primiwill 1 9\n", "19310 libetty 1 7\n", "19315 campattend 1 10\n", "19325 hopeduty 1 8\n", "19316 faceto 1 6\n", "19317 useof 1 5\n", "19318 saniof 1 6\n", "19319 dothese 1 7\n", "19320 sanitaretable 1 13\n", "19321 volare 1 6\n", "19322 saniwho 1 7\n", "19323 resaveci 1 8\n", "19324 rade 1 4\n", "19356 eyegate 1 7\n", "19358 suii 1 4\n", "19410 ioft 1 4\n", "19397 chao 1 4\n", "19387 anticipa 1 8\n", "19388 extenunless 1 11\n", "19389 sacrido 1 7\n", "19390 carmichaela 1 11\n", "19392 mimor 1 5\n", "19393 bedtions 1 8\n", "19394 mosiac 1 6\n", "19395 nrany 1 5\n", "19396 departgestion 1 13\n", "19398 gairo 1 5\n", "19384 messaore 1 8\n", "19399 everycommunity 1 14\n", "19400 watanga 1 7\n", "19401 preseries 1 9\n", "19402 baccalaurette 1 13\n", "19403 faumi 1 5\n", "19405 douthe 1 6\n", "19406 prille 1 6\n", "19408 peogood 1 7\n", "19409 nebber 1 6\n", "19386 unreliarect 1 11\n", "19383 awaywhen 1 8\n", "19359 sepaworld 1 9\n", "19371 treastirer 1 10\n", "19360 meantheir 1 9\n", "19361 aplowed 1 7\n", "19362 missionnoon 1 11\n", "19363 womengave 1 9\n", "19364 genwe 1 5\n", "19366 believersalso 1 13\n", "19367 sewith 1 6\n", "19368 durenter 1 8\n", "19369 serness 1 7\n", "19372 thenceto 1 8\n", "19382 unfavoris 1 9\n", "19373 e'cientlida 1 11\n", "19374 uct 1 3\n", "19375 thingsin 1 8\n", "19376 matanavat 1 9\n", "19377 oeta 1 4\n", "19378 vaiue 1 5\n", "19379 sabforth 1 8\n", "19380 preslege 1 8\n", "19381 laorn 1 5\n", "54011 wagonmaker 1 10\n", "\n", "[53691 rows x 3 columns]\n" ] } ], "source": [ "title = 'PUR'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 0, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2017-01-04T10:10:02.716060", "start_time": "2017-01-04T10:10:01.633422" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for RH:\n", " spell_error count word_length\n", "228574 tion 5691 4\n", "528644 'the 5093 4\n", "64390 brn 3962 3\n", "359021 ment 2885 4\n", "385865 pre 2872 3\n", "128283 seventhday 2847 10\n", "601139 chas 2837 4\n", "78614 'of 2796 3\n", "309121 ets 2249 3\n", "291436 eze 2209 3\n", "579891 mal 2047 3\n", "52843 'to 2030 3\n", "332463 ahd 1988 3\n", "209212 'and 1824 4\n", "674347 sabbathschool 1771 13\n", "642599 aro 1622 3\n", "28185 tions 1538 5\n", "88323 sel 1511 3\n", "194290 'be 1475 3\n", "366775 ence 1386 4\n", "360182 ent 1320 3\n", "29268 thi 1237 3\n", "501423 ers 1234 3\n", "4367 'in 1202 3\n", "414567 ments 1155 5\n", "599375 ver 1075 3\n", "532312 tbe 1042 3\n", "448553 ple 1040 3\n", "65398 ble 1026 3\n", "107355 ofthe 998 5\n", "278367 sabbathkeepers 978 14\n", "579962 ful 960 3\n", "342546 sabba 959 5\n", "217399 'by 943 3\n", "450318 'that 900 5\n", "45873 'he 850 3\n", "282692 ber 775 3\n", "391606 thos 754 4\n", "422447 ference 737 7\n", "9245 ance 733 4\n", "248739 jno 732 3\n", "96380 'is 730 3\n", "598240 'have 728 5\n", "505083 overcomer 721 9\n", "78756 twentyfive 713 10\n", "394166 mis 710 3\n", "372411 tem 701 3\n", "211974 ith 690 3\n", "142738 ity 686 3\n", "56726 ole 678 3\n", "327877 tle 656 3\n", "23743 'for 655 4\n", "597100 xxiiil 655 6\n", "571960 ther 644 4\n", "672889 ren 639 3\n", "576377 inthe 623 5\n", "111480 'his 605 4\n", "200579 bas 600 3\n", "115450 bno 597 3\n", "660508 xviil 590 5\n", "142972 nee 587 3\n", "275006 dobney 580 6\n", "580577 xxivl 564 5\n", "504388 sabbaton 557 8\n", "529044 ous 548 3\n", "546784 eral 541 4\n", "79050 ern 540 3\n", "638354 tidende 534 7\n", "634126 xxiil 533 5\n", "641418 whitford 529 8\n", "361997 eview 528 5\n", "216059 tian 528 4\n", "245632 ioo 522 3\n", "478536 xviiil 517 6\n", "221592 agt 515 3\n", "10287 ots 506 3\n", "416438 firstday 505 8\n", "594672 'but 503 4\n", "356069 anb 503 3\n", "348815 'has 503 4\n", "45888 ture 503 4\n", "536842 whi 494 3\n", "466292 soo 493 3\n", "324209 frisbie 491 7\n", "682045 ceived 491 6\n", "378126 medo 487 4\n", "143450 peo 477 3\n", "350974 dren 472 4\n", "225416 'as 469 3\n", "436843 tiie 466 4\n", "301953 ise 458 3\n", "632528 micr 458 4\n", "16554 ject 457 4\n", "532950 ters 456 4\n", "381391 ure 449 3\n", "593726 'been 448 5\n", "128332 'we 443 3\n", "49350 fon 441 3\n", "201984 susp 438 4\n", "65221 irs 434 3\n", "... ... ... ...\n", "192758 irm 11 3\n", "192094 'happiness 11 10\n", "595697 shumate 11 7\n", "191245 itj 11 3\n", "190774 'obey 11 5\n", "596416 anumber 11 7\n", "591710 rbirat 11 6\n", "189820 monze 11 5\n", "188954 debted 11 6\n", "188767 upbn 11 4\n", "596772 othat 11 5\n", "188473 fufilled 11 8\n", "186644 helieveth 11 9\n", "193176 isees 11 5\n", "594720 hoppie 11 6\n", "193515 'officers 11 9\n", "193808 bossert 11 7\n", "594401 genf 11 4\n", "194468 bam 11 3\n", "194638 'district 11 9\n", "195996 shouid 11 6\n", "593485 vli 11 3\n", "592592 puld 11 4\n", "592456 newsom 11 6\n", "196856 whoe 11 4\n", "196913 gium 11 4\n", "592364 esis 11 4\n", "196922 increa 11 6\n", "197124 ceptible 11 8\n", "591847 e't 11 3\n", "212304 nner 11 4\n", "584729 nrk 11 3\n", "213062 laof 11 4\n", "225016 cutchan 11 7\n", "226668 catastrophies 11 13\n", "226624 diator 11 6\n", "578695 autho 11 5\n", "578704 tock 11 4\n", "578972 stocker 11 7\n", "225025 ttinto 11 6\n", "225001 nicolaitans 11 11\n", "213283 'ie 11 3\n", "579297 beif 11 4\n", "579409 dolorosa 11 8\n", "224808 lawit 11 5\n", "224395 'ni 11 3\n", "224354 knowle 11 6\n", "224245 'season 11 7\n", "226887 lty 11 3\n", "227156 vrt 11 3\n", "578105 dli 11 3\n", "228853 tiuth 11 5\n", "229332 wilkie 11 6\n", "576917 morni 11 5\n", "230434 complishing 11 11\n", "230443 'sign 11 5\n", "576232 'difficult 11 10\n", "231900 subjeot 11 7\n", "232005 iaskell 11 7\n", "232114 'spoken 11 7\n", "232282 'conferences 11 12\n", "232350 ponding 11 7\n", "232429 eddyism 11 7\n", "233242 terness 11 7\n", "233305 'ir 11 3\n", "223911 dehim 11 5\n", "579797 virbrook 11 8\n", "223697 'answer 11 7\n", "217387 'possible 11 9\n", "213510 tvittv 11 6\n", "584305 'contains 11 9\n", "584289 brom 11 4\n", "584100 akt 11 3\n", "213591 schbol 11 6\n", "583743 carriacou 11 9\n", "214442 publishi 11 8\n", "214752 freeand 11 7\n", "583459 polanders 11 9\n", "215020 inary 11 5\n", "215573 thousanddollar 11 14\n", "583066 posure 11 6\n", "582949 reatly 11 6\n", "216962 tlds 11 4\n", "218312 gions 11 5\n", "579884 goor 11 4\n", "218392 eartb 11 5\n", "219767 coinmenced 11 10\n", "220141 wito 11 4\n", "220988 wara 11 4\n", "221517 seim 11 4\n", "221552 haller 11 6\n", "581358 retu 11 4\n", "221976 aftr 11 4\n", "581267 'land 11 5\n", "222401 beilhart 11 8\n", "581094 characterthe 11 12\n", "580738 whici 11 5\n", "222731 pampangan 11 9\n", "222872 nill 11 4\n", "13 sabbatit 11 8\n", "\n", "[14693 rows x 3 columns]\n" ] } ], "source": [ "title = 'RH'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 10, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2017-01-04T10:24:34.295797", "start_time": "2017-01-04T10:24:33.856515" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for Sligo:\n", " spell_error count word_length\n", "249 sligon 36 6\n", "1214 schwab 30 6\n", "1300 mattingly 22 9\n", "2318 kuppenheimer 20 12\n", "1058 kamoda 15 6\n", "1938 herzog 14 6\n", "1488 lippart 14 7\n", "586 styleplus 14 9\n", "1388 dietel 14 6\n", "2321 geibel 13 6\n", "1156 rebok 13 5\n", "2293 kimonas 12 7\n", "49 flather 12 7\n", "1849 chesnutt 11 8\n", "1371 ahrens 11 6\n", "1335 friedlander 11 11\n", "1226 greiner 11 7\n", "519 ailes 11 5\n", "1973 quartette 10 9\n", "2392 cardia's 10 8\n", "1090 lenoa 10 5\n", "2403 furnishers 10 10\n", "725 slgonian 9 8\n", "2402 woolgar 9 7\n", "1477 grosner 9 7\n", "2197 iverson 8 7\n", "562 ott 8 3\n", "151 gradye 8 6\n", "2118 kollege 8 7\n", "909 herbst 8 6\n", "2233 minola 8 6\n", "103 blackistone 8 11\n", "1122 kupjian 8 7\n", "712 hallowe'en 8 10\n", "2294 chas 8 4\n", "2316 newmyer 8 7\n", "1129 zink 7 4\n", "2115 battleford 7 10\n", "74 schilberg 7 9\n", "1968 estep 7 5\n", "615 yoshihiro 7 9\n", "558 klothes 7 7\n", "1325 clapp 7 5\n", "1430 tvedt 6 5\n", "1448 voorhis 6 7\n", "1485 nevius 6 6\n", "302 botsford 6 8\n", "1693 deitel 6 6\n", "2098 boquets 6 7\n", "1698 feldman 6 7\n", "1889 jeffries 6 8\n", "2000 dulany 6 6\n", "1278 labrot 6 6\n", "2347 brines 6 6\n", "2394 hirsh's 6 7\n", "1326 rozier 6 6\n", "247 ryneal 6 6\n", "788 muth 6 4\n", "496 sevrens 6 7\n", "1073 monsen 6 6\n", "948 woodwardand 6 11\n", "935 iden 6 4\n", "1130 coyl 6 4\n", "1016 duval 6 5\n", "1243 harkins 6 7\n", "94 preferwhether 5 13\n", "1853 loasby 5 6\n", "1662 carnig 5 6\n", "1770 beamesderfer 5 12\n", "667 gerhart 5 7\n", "2351 greutman 5 8\n", "1726 mercereau 5 9\n", "2254 transtrom 5 9\n", "757 ingeborg 5 8\n", "1020 colea 5 5\n", "1700 barto 5 5\n", "765 treible 5 7\n", "1529 nanking 5 7\n", "592 dyoll 5 5\n", "533 ablewhen 5 8\n", "2001 llylel 5 6\n", "1166 windon 5 6\n", "512 wyche 5 5\n", "1420 prohis 5 6\n", "1357 resseguie 5 9\n", "414 clemen 5 6\n", "53 classmen 5 8\n", "141 callier 5 7\n", "710 washingtondc 4 12\n", "2119 frankin 4 7\n", "1255 maybelle 4 8\n", "692 pleasants 4 9\n", "98 liij 4 4\n", "2353 kimber 4 6\n", "2364 tunesassa 4 9\n", "662 kaelin 4 6\n", "650 yelland 4 7\n", "622 paperyou 4 8\n", "934 dimmock 4 7\n", "1927 pre 4 3\n", "943 willman 4 7\n", "1887 tattbg 4 6\n", "316 siagonian 4 9\n", "1164 eulah 4 5\n", "1568 accessable 4 10\n", "1571 latrobes 4 8\n", "1623 maye 4 4\n", "1742 virbrook 4 8\n", "1760 kewley 4 6\n", "1890 mallatt 4 7\n", "2050 workcleaning 4 12\n", "468 munsch 4 6\n", "1958 mattison 4 8\n", "950 sangster 4 8\n", "108 glickman 4 8\n", "1989 ite 4 3\n", "1996 idetta 4 6\n", "2026 feely 4 5\n", "564 ooletwah 4 8\n" ] } ], "source": [ "title = 'Sligo'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "ExecuteTime": { "end_time": "2017-01-04T10:32:33.669871", "start_time": "2017-01-04T10:32:33.593472" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for SOL:\n", " spell_error count word_length\n", "7320 bsl 79 3\n", "2290 agl 51 3\n", "6457 mutchler 44 8\n", "6427 sabbatteans 38 11\n", "3200 loth 38 4\n", "262 tion 37 4\n", "4502 'the 33 4\n", "4139 farmakis 29 8\n", "7683 ment 28 4\n", "3089 saloonmen 27 9\n", "3896 sundayclosing 26 13\n", "4660 ioo 25 3\n", "5003 wctu 24 4\n", "1777 combinationthe 24 14\n", "4242 kishineff 23 9\n", "5299 faul 22 4\n", "7843 schurman 20 8\n", "717 selfgovernment 20 14\n", "6981 rampolla 20 8\n", "6735 sundayno 19 8\n", "957 ourduty 19 7\n", "6102 allister 19 8\n", "6371 seventhday 19 10\n", "1600 saloonkeepers 19 13\n", "7365 theliquor 18 9\n", "6441 socalled 17 8\n", "180 platt 16 5\n", "5179 'of 16 3\n", "4775 tions 15 5\n", "3487 pre 15 3\n", "6932 saloonkeeper 15 12\n", "1808 chas 14 4\n", "2287 sundayenforcement 14 17\n", "6541 thos 14 4\n", "3115 birney 14 6\n", "6835 'to 13 3\n", "1835 tien 13 4\n", "2934 temperanceand 13 13\n", "1524 muskoka 12 7\n", "4292 milman 12 6\n", "7044 churchand 12 9\n", "5162 guidi 12 5\n", "4509 tsin 12 4\n", "1593 grocerymen 11 10\n", "5727 satolli 11 7\n", "1617 ricans 11 6\n", "270 sundaylaw 11 9\n", "5720 birnie 11 6\n", "1411 hine 10 4\n", "3165 'with 10 5\n", "1350 parte 10 5\n", "7435 gohier 10 6\n", "622 mala 10 4\n", "5723 lawabiding 9 10\n", "2205 godgiven 9 8\n", "7552 postoffices 9 11\n", "2536 employes 9 8\n", "713 jailor 9 6\n", "2307 munn 9 4\n", "5603 farreaching 9 11\n", "2014 'and 9 4\n", "2006 twentyfive 9 10\n", "1684 vires 9 5\n", "4370 freethought 9 11\n", "2087 brien 8 5\n", "5168 thwing 8 6\n", "3234 montns 8 6\n", "1531 humbert 8 7\n", "7028 tian 8 4\n", "7015 ance 8 4\n", "3122 cossa 8 5\n", "6286 philipps 8 8\n", "401 epist 8 5\n", "5804 rican 8 5\n", "3911 pendergast 8 10\n", "2757 'that 8 5\n", "3375 erty 8 4\n", "4274 secularities 8 12\n", "6888 'is 8 3\n", "1040 ernment 8 7\n", "4376 ljudge 7 6\n", "3320 octabo 7 6\n", "3210 octa'bo 7 7\n", "7847 cormenin 7 8\n", "7120 sparhawk 7 8\n", "279 bergfeldt 7 9\n", "7024 legislationa 7 12\n", "6771 greenburg 7 9\n", "1098 boutwell 7 8\n", "7417 broussa 7 7\n", "7630 weyler 7 6\n", "3737 tke 7 3\n", "5174 charta 7 6\n", "7445 trevier 7 7\n", "7439 beckler 7 7\n", "5150 enactmentment 6 13\n", "5105 seuleuz 6 7\n", "3596 brownists 6 9\n", "5237 martinelli 6 10\n", "3878 hillis 6 6\n", "... ... ... ...\n", "2223 polver 5 6\n", "4203 americanists 5 12\n", "1679 anagni 5 6\n", "484 dechristianizing 5 16\n", "4287 ther 5 4\n", "3374 coun 5 4\n", "4489 jaycox 5 6\n", "4740 rin 5 3\n", "2371 reconcentration 5 15\n", "4546 priebe 5 6\n", "2677 smyth 5 5\n", "2556 benchmen 5 8\n", "4602 chaingang 5 9\n", "243 sabbathbreaking 5 15\n", "215 tothe 5 5\n", "1935 vannutelli 4 10\n", "6654 indefeasable 4 12\n", "7023 cohn 4 4\n", "1682 goldwin 4 7\n", "1742 'blue 4 5\n", "1884 gebennus 4 8\n", "2217 combinaion 4 10\n", "1993 teris 4 5\n", "6792 itis 4 4\n", "6720 riis 4 4\n", "2279 pecci 4 5\n", "2251 buehler 4 7\n", "2228 christion 4 9\n", "1003 turlupins 4 9\n", "1605 lowrie 4 6\n", "501 eell 4 4\n", "7851 oxman 4 5\n", "7795 corario 4 7\n", "7791 peoplethe 4 9\n", "7753 ized 4 4\n", "7733 issueii 4 7\n", "288 proudfit 4 8\n", "289 thingseither 4 12\n", "7644 ters 4 4\n", "363 ual 4 3\n", "505 christain 4 9\n", "1277 willi 4 5\n", "590 shopman 4 7\n", "769 papacythe 4 9\n", "774 appli 4 5\n", "852 fortynine 4 9\n", "977 kensil 4 6\n", "2450 implysa 4 7\n", "1036 illne 4 5\n", "1080 tional 4 6\n", "1130 yalova 4 6\n", "2384 segal 4 5\n", "4766 ncopy 4 5\n", "2527 protestante 4 11\n", "5180 tll 4 3\n", "4108 morrissey 4 9\n", "4111 crescy 4 6\n", "5588 ridpath 4 7\n", "5443 crimmins 4 8\n", "5416 sabath 4 6\n", "5370 ence 4 4\n", "5331 connectedly 4 11\n", "5295 franke 4 6\n", "4272 thibet 4 6\n", "4508 'when 4 5\n", "3732 julydecember 4 12\n", "4532 firstday 4 8\n", "4539 oth 4 3\n", "5128 vali 4 4\n", "5119 relig 4 5\n", "4553 gottlieb 4 8\n", "4557 hoppe 4 5\n", "4690 violi 4 5\n", "4854 mccorkle 4 8\n", "4699 sir' 4 4\n", "3767 greenstein 4 10\n", "3675 por 4 3\n", "6519 soo 4 3\n", "6110 demagogism 4 10\n", "2694 fora 4 4\n", "6395 haye 4 4\n", "2911 christthe 4 9\n", "6364 'person 4 7\n", "6346 ovr 4 3\n", "3024 meeser 4 6\n", "6222 sabbathkeeping 4 14\n", "6140 mallalieu 4 9\n", "3169 apos 4 4\n", "3182 ipany 4 5\n", "3522 ymeaornths 4 10\n", "6069 ious 4 4\n", "3199 pettingill 4 10\n", "6010 sixmonths 4 9\n", "5926 prive 4 5\n", "5921 syar 4 4\n", "3358 oneseventh 4 10\n", "5799 hirsch 4 6\n", "3428 gobernment 4 10\n", "3473 tth 4 3\n", "42 selfevident 4 11\n", "\n", "[258 rows x 3 columns]\n" ] } ], "source": [ "title = 'SOL'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "ExecuteTime": { "end_time": "2017-01-04T11:10:21.348796", "start_time": "2017-01-04T11:10:20.844769" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for ST:\n", " spell_error count word_length\n", "143577 tion 2185 4\n", "113227 'the 1527 4\n", "183046 eze 1301 3\n", "188020 altho 1275 5\n", "6577 ment 1184 4\n", "23300 pre 791 3\n", "194004 ets 791 3\n", "49006 'of 782 3\n", "3487 sel 778 3\n", "17579 tions 614 5\n", "145305 mal 565 3\n", "199614 'to 543 3\n", "72990 aro 534 3\n", "131547 'and 531 4\n", "41155 ments 497 5\n", "158770 chas 451 4\n", "11333 ence 431 4\n", "80289 seventhday 392 10\n", "145362 ful 370 3\n", "95919 ers 358 3\n", "5804 ance 354 4\n", "212384 fbr 340 3\n", "62335 ple 340 3\n", "40743 ble 325 3\n", "31478 stuttle 316 7\n", "101645 ity 313 3\n", "63492 'that 303 5\n", "204350 sabbathschool 296 13\n", "2720 'in 287 3\n", "121226 thoroly 266 7\n", "135817 tian 263 4\n", "182936 'em 262 3\n", "7277 ent 259 3\n", "124363 geikie 253 6\n", "130927 igns 250 4\n", "64628 synagog 240 7\n", "176478 thruout 239 7\n", "60157 'is 237 3\n", "139992 gigno 234 5\n", "67187 ofthe 233 5\n", "28654 tht 226 3\n", "97836 sabbaton 224 8\n", "191291 ber 213 3\n", "137965 cigaret 204 7\n", "218177 clure 199 5\n", "113457 ous 199 3\n", "203045 thi 197 3\n", "1568 dren 196 4\n", "20492 ure 194 3\n", "42426 gilfillan 193 9\n", "107206 tle 192 3\n", "28458 mis 191 3\n", "141254 allister 189 8\n", "122747 employes 188 8\n", "122164 'be 178 3\n", "183217 lld 178 3\n", "115487 tbe 175 3\n", "143065 inthe 175 5\n", "120831 neander 166 7\n", "186303 ther 163 4\n", "65198 arv 163 3\n", "105796 cruden 157 6\n", "14795 'for 157 4\n", "111276 mandments 152 9\n", "49084 twentyfive 149 10\n", "42696 moneyorders 149 11\n", "120282 gign 147 4\n", "98269 overcomer 147 9\n", "4852 thoro 147 5\n", "51282 ioo 146 3\n", "76415 robb 145 4\n", "157667 ver 142 3\n", "209182 ceived 142 6\n", "172962 cigarets 142 8\n", "28556 'he 140 3\n", "90237 'neath 139 6\n", "124562 eral 139 4\n", "115900 ters 139 4\n", "115619 socalled 138 8\n", "110014 nal 138 3\n", "92821 sionary 137 7\n", "133260 ith 137 3\n", "111835 tem 136 3\n", "17679 'not 136 4\n", "108430 ise 135 3\n", "18098 ght 135 3\n", "141626 'as 134 3\n", "144909 'it 132 3\n", "203017 sions 130 5\n", "162787 thos 130 4\n", "89982 peo 129 3\n", "49140 ures 129 4\n", "22520 'by 127 3\n", "104369 tite 125 4\n", "72036 ished 124 5\n", "54255 ary 123 3\n", "133856 sus 123 3\n", "157613 sigjts 123 6\n", "59052 eousness 123 8\n", "193729 mony 123 4\n", "... ... ... ...\n", "84442 serviee 4 7\n", "167637 serrant 4 7\n", "166845 sorrowless 4 10\n", "85824 'red 4 4\n", "85838 somo 4 4\n", "166393 merse 4 5\n", "166074 calledto 4 8\n", "86829 nothwith 4 8\n", "86825 messager 4 8\n", "166150 amples 4 6\n", "166161 noother 4 7\n", "86763 barra 4 5\n", "166207 orach 4 5\n", "166263 meeti 4 5\n", "86701 wricox 4 6\n", "86672 errys 4 5\n", "86640 earlie 4 6\n", "86600 'coo 4 4\n", "166371 godemark 4 8\n", "166399 denarii 4 7\n", "85965 thecharacter 4 12\n", "86541 itselfas 4 8\n", "86536 excus 4 5\n", "86434 thegood 4 7\n", "86424 'turn 4 5\n", "86380 clesar 4 6\n", "86337 secute 4 6\n", "166569 thehighest 4 10\n", "86140 zangwill 4 8\n", "86087 studen 4 6\n", "86047 derous 4 6\n", "86033 kolhapur 4 8\n", "166725 'command 4 8\n", "166733 bleness 4 7\n", "167650 alzog 4 5\n", "167675 conditiona 4 10\n", "82560 owu 4 3\n", "84404 onment 4 6\n", "168478 rtonement 4 9\n", "168551 ishe 4 4\n", "83111 jscellaneous 4 12\n", "168593 oursel 4 6\n", "168611 tijles 4 6\n", "168649 saintsa 4 7\n", "83077 acrs 4 4\n", "83064 nino 4 4\n", "168705 brabourne 4 9\n", "168721 cribed 4 6\n", "168729 aiwa 4 4\n", "83052 linde 4 5\n", "83016 'please 4 7\n", "83011 llu 4 3\n", "168852 turtullian 4 10\n", "82972 amsdorf 4 7\n", "168854 imbe 4 4\n", "168881 'indeed 4 7\n", "82899 'communications 4 15\n", "82880 thgm 4 4\n", "82842 riously 4 7\n", "82796 sunto 4 5\n", "169091 sethe 4 5\n", "169093 imself 4 6\n", "82651 sau 4 3\n", "169148 beasee 4 6\n", "169205 'husband 4 8\n", "82581 cfesar 4 6\n", "82569 weilheimer 4 10\n", "168453 oty 4 3\n", "83160 grimage 4 7\n", "168395 fbllow 4 6\n", "168068 esculapius 4 10\n", "84374 nually 4 6\n", "84274 sunclay 4 7\n", "167732 ipr 4 3\n", "167757 greatand 4 8\n", "84241 orby 4 4\n", "84223 vati 4 4\n", "84068 wrrn 4 4\n", "84064 izi 4 3\n", "84043 pecul 4 5\n", "83925 jenin 4 5\n", "83717 whieli 4 6\n", "168014 by' 4 3\n", "83685 protestingly 4 12\n", "168101 unimpeached 4 11\n", "168389 worldof 4 7\n", "83613 efir 4 4\n", "168119 hagios 4 6\n", "168150 entree 4 6\n", "168190 hothe 4 5\n", "83507 destinyof 4 9\n", "83463 pastthe 4 7\n", "83409 midyat 4 6\n", "168270 testhe 4 6\n", "168271 sery 4 4\n", "83407 vergeze 4 7\n", "83403 moffitt 4 7\n", "168315 yosemitevalley 4 14\n", "83291 salvaand 4 8\n", "109488 tomer 4 5\n", "\n", "[13500 rows x 3 columns]\n" ] } ], "source": [ "title = 'ST'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2017-01-04T12:53:58.742422", "start_time": "2017-01-04T12:53:58.465298" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for SUW:\n", " spell_error count word_length\n", "33462 bfl 912 3\n", "2585 agts 838 4\n", "30115 chas 433 4\n", "33942 ords 415 4\n", "10650 bracy 289 5\n", "15563 vagh 282 4\n", "26188 wks 264 3\n", "14101 billups 241 7\n", "17041 chastain 238 8\n", "22054 lennan 233 6\n", "33542 seventhday 219 10\n", "5501 peevy 210 5\n", "18471 schroader 205 9\n", "28371 reichenbach 205 11\n", "33473 ppe 203 3\n", "21102 chenault 193 8\n", "22854 colrey 191 6\n", "4971 'the 191 4\n", "1757 tew 175 3\n", "17521 sof 172 3\n", "29186 ppp 170 3\n", "19603 allman 164 6\n", "34911 thos 157 4\n", "9021 tion 149 4\n", "19270 manous 147 6\n", "28960 ern 138 3\n", "16411 winkler 132 7\n", "17156 hustburg 131 8\n", "28907 griffiths 129 9\n", "18416 bfi 127 3\n", "24703 ference 125 7\n", "31331 lura 122 4\n", "29503 cannada 121 7\n", "19487 ntp 113 3\n", "3918 allran 113 6\n", "2744 cennessee 112 9\n", "17989 rayford 106 7\n", "5040 parkins 104 7\n", "28625 'of 98 3\n", "2688 bodwell 97 7\n", "9029 morphew 96 7\n", "1811 mis 95 3\n", "16723 sabbathschool 93 13\n", "22797 ioo 92 3\n", "14794 ence 90 4\n", "19497 sofp 88 4\n", "15219 deliv'd 86 7\n", "11173 berdon 85 6\n", "16527 ewald 84 5\n", "25736 deliv 84 5\n", "19324 millar 84 6\n", "7666 hirst 78 5\n", "25611 whitford 78 8\n", "30371 pre 77 3\n", "28301 charlsey 76 8\n", "29340 ridder 76 6\n", "5655 wor 76 3\n", "23472 elhany 72 6\n", "34715 memb 70 4\n", "17905 minnis 70 6\n", "2890 romines 69 7\n", "25891 womack 69 6\n", "7571 'to 69 3\n", "32592 krauss 69 6\n", "21563 reiber 68 6\n", "16221 ment 67 4\n", "25313 jno 67 3\n", "18425 sherer 67 6\n", "18212 parizetta 67 9\n", "18671 perthia 66 7\n", "2099 achenbach 65 9\n", "17520 ber 64 3\n", "18144 ers 64 3\n", "28746 'and 64 4\n", "9210 ellabama 63 8\n", "3500 'in 62 3\n", "33738 tri 61 3\n", "12760 frisby 60 6\n", "24067 stoc 60 4\n", "19041 ypmv 60 4\n", "20909 lettie 59 6\n", "19234 totalsa 59 7\n", "1025 garrigan 59 8\n", "28737 twentyfive 57 10\n", "2461 neill 57 5\n", "14968 cheshier 57 8\n", "15118 sewellton 56 9\n", "19583 lanier 56 6\n", "2544 shasky 56 6\n", "14420 drbr 56 4\n", "20272 leod 56 4\n", "21808 ppv 56 3\n", "32096 sie 55 3\n", "29980 bpi 54 3\n", "4938 sellars 52 7\n", "6275 pendas 51 6\n", "29296 woodall 51 7\n", "12911 elford 51 6\n", "25814 sabbathkeepers 51 14\n", "22553 walbert 50 7\n", "... ... ... ...\n", "28427 gesting 4 7\n", "7778 thp 4 3\n", "7720 'till 4 5\n", "7681 arkebauer 4 9\n", "7677 wou 4 3\n", "28546 'goo 4 4\n", "28563 urday 4 5\n", "28594 'three 4 6\n", "7630 periences 4 9\n", "7603 rti 4 3\n", "28642 'see 4 4\n", "7577 sani 4 4\n", "7536 ednesday 4 8\n", "28763 boox 4 4\n", "28797 ures 4 4\n", "9804 seuenth 4 7\n", "26364 mayde 4 5\n", "12670 wasteless 4 9\n", "11918 tennesssee 4 10\n", "11908 desir 4 5\n", "11877 patzkowski 4 10\n", "11727 dence 4 5\n", "11623 llie 4 4\n", "11512 tablished 4 9\n", "24591 conierence 4 10\n", "24635 axwm 4 4\n", "11423 throughthe 4 10\n", "11392 wdr 4 3\n", "11364 iana 4 4\n", "24706 encour 4 6\n", "11341 pebruary 4 8\n", "11208 wth 4 3\n", "11138 elle 4 4\n", "24854 truthladen 4 10\n", "24855 twa 4 3\n", "11097 contribs 4 8\n", "11914 mura 4 4\n", "24449 o'erflow 4 8\n", "24962 'given 4 6\n", "24435 gartley 4 7\n", "12665 recieved 4 8\n", "24127 sse 4 3\n", "24137 isters 4 6\n", "24143 binks 4 5\n", "24188 us' 4 3\n", "12401 ellabatna 4 9\n", "24200 oin 4 3\n", "12382 reso 4 4\n", "12314 loinstana 4 9\n", "12280 retaries 4 8\n", "24285 gra 4 3\n", "24323 wnorwood 4 8\n", "12226 contro 4 6\n", "12185 vayne 4 5\n", "12169 churche 4 7\n", "12102 oodsmark 4 8\n", "12037 pvenue 4 6\n", "11017 seruant 4 7\n", "24966 ited 4 4\n", "26356 welltrained 4 11\n", "10597 haue 4 4\n", "10330 bof 4 3\n", "10276 mangin 4 6\n", "10242 misssionary 4 11\n", "25922 sath 4 4\n", "25947 'their 4 6\n", "10121 ftw 4 3\n", "26048 soutitern 4 9\n", "10111 denomi 4 6\n", "10056 frf 4 3\n", "26155 alister 4 7\n", "26157 sfoc 4 4\n", "10016 fausset 4 7\n", "26258 nealy 4 5\n", "26269 ized 4 4\n", "26271 oneof 4 5\n", "9922 spearwk 4 7\n", "26306 andit 4 5\n", "25726 hree 4 4\n", "25674 jonesbf 4 7\n", "24968 stn 4 3\n", "10608 asse 4 4\n", "11008 notia 4 5\n", "11001 zeichen 4 7\n", "25084 bef 4 3\n", "10963 psr 4 3\n", "10912 idi 4 3\n", "10910 thereis 4 7\n", "25259 twould 4 6\n", "25284 'experience 4 11\n", "25312 profes 4 6\n", "25340 delied 4 6\n", "25395 tennes 4 6\n", "10754 fordbr 4 6\n", "10752 ect 4 3\n", "25592 shornburg 4 9\n", "10680 essary 4 6\n", "10636 adkisson 4 8\n", "10614 fel 4 3\n", "9447 ceeded 4 6\n", "\n", "[2098 rows x 3 columns]\n" ] } ], "source": [ "title = 'SUW'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2017-01-04T14:10:38.206306", "start_time": "2017-01-04T14:10:37.993401" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for TCOG:\n", " spell_error count word_length\n", "5831 'the 106 4\n", "8140 eze 59 3\n", "2897 mal 55 3\n", "2492 'of 51 3\n", "4803 tbe 45 3\n", "6006 mayta 45 5\n", "6951 scudder 40 7\n", "2630 'and 39 4\n", "8792 'to 38 3\n", "7083 agtte 36 5\n", "6087 missi 35 5\n", "8918 seventhday 33 10\n", "7709 hsi 33 3\n", "8872 hasan 28 5\n", "71 epartment 27 9\n", "7036 cburth 25 6\n", "7958 hunchy 24 6\n", "3343 cburtb 24 6\n", "4140 'in 24 3\n", "2122 tion 22 4\n", "1977 'for 22 4\n", "4553 pre 21 3\n", "4260 'be 18 3\n", "1161 puno 18 4\n", "5237 neesima 17 7\n", "2929 outschools 17 10\n", "6241 cburcb 17 6\n", "9194 metlakahtla 15 11\n", "6846 crehore 14 7\n", "9096 tne 14 3\n", "7032 perces 13 6\n", "9914 nez 13 3\n", "7168 thi 13 3\n", "5301 tay 13 3\n", "3281 occum 12 5\n", "7558 idona 12 5\n", "1768 soulwinning 12 11\n", "2374 goin 12 4\n", "7781 ise 12 3\n", "2289 'he 12 3\n", "8126 jule 12 4\n", "3160 buresala 11 8\n", "5325 alf 11 3\n", "7956 'em 11 3\n", "2682 'all 11 4\n", "10135 legiac 11 6\n", "8869 dilawur 11 7\n", "9001 'was 11 4\n", "1750 gon 11 3\n", "229 him' 11 4\n", "1439 obookiah 10 8\n", "2745 'his 10 4\n", "5913 you' 10 4\n", "2341 johan 10 5\n", "7906 seino 10 5\n", "2619 twentyfive 10 10\n", "9331 aette 10 5\n", "3517 serkey 10 6\n", "3615 'that 10 5\n", "8115 nyasaland 10 9\n", "3939 wantedyoung 10 11\n", "9895 ment 10 4\n", "5258 'work 9 5\n", "9799 solusi 9 6\n", "5520 muramatsu 9 9\n", "9260 phuloo 9 6\n", "3598 them' 9 5\n", "3918 'they 9 5\n", "6033 sangster 9 8\n", "617 'one 9 4\n", "6894 hoa 9 3\n", "8179 finster 9 7\n", "2926 selfdenial 9 10\n", "7601 'as 9 3\n", "7823 'church 9 7\n", "1506 neddie 9 6\n", "2307 'it 9 3\n", "8059 thei 9 4\n", "3040 turvy 8 5\n", "6504 litsi 8 5\n", "7961 cburrb 8 6\n", "3401 hetty 8 5\n", "5289 greatorex 8 9\n", "9736 guianas 8 7\n", "68 havergal 8 8\n", "5418 it' 8 3\n", "2137 mis 8 3\n", "2325 abu 8 3\n", "4076 tidens 8 6\n", "9081 ofthe 8 5\n", "4564 'will 8 5\n", "2473 god' 8 4\n", "1144 tosti 8 5\n", "3870 mehemet 8 7\n", "1275 tiie 7 4\n", "1103 thome 7 5\n", "297 nee 7 3\n", "2514 pietro 7 6\n", "9793 floy 7 4\n", "2688 mit 7 3\n", "... ... ... ...\n", "1411 thechurch 4 9\n", "8200 zwemer 4 6\n", "9414 'second 4 7\n", "9759 oldfashioned 4 12\n", "9788 'an 4 3\n", "1383 grose 4 5\n", "564 cooey 4 5\n", "9149 thosewho 4 8\n", "458 thd 4 3\n", "2254 disfellowshiping 4 16\n", "713 brower 4 6\n", "826 ent 4 3\n", "843 'missionary 4 11\n", "9462 'some 4 5\n", "846 servi 4 5\n", "849 fiske 4 5\n", "9630 'most 4 5\n", "9633 herzog 4 6\n", "8969 brouilette 4 10\n", "8921 pitania 4 7\n", "891 ments 4 5\n", "926 faraoa 4 6\n", "1060 cleland 4 7\n", "9692 'said 4 5\n", "1176 hannington 4 10\n", "9745 notruction 4 10\n", "1181 misiones 4 8\n", "8767 chri 4 4\n", "517 nickie 4 6\n", "9769 'would 4 6\n", "8678 hav 4 3\n", "577 kno 4 3\n", "7373 sionarp 4 7\n", "2274 grythyttehed 4 12\n", "5544 fon 4 3\n", "6369 qur 4 3\n", "2945 aleander 4 8\n", "2953 godward 4 7\n", "6235 spe 4 3\n", "6180 'asked 4 6\n", "6140 hini 4 4\n", "2968 ful 4 3\n", "3045 fiveminute 4 10\n", "3071 ingruction 4 10\n", "3089 chau 4 4\n", "5840 'when 4 5\n", "3124 ood 4 3\n", "3125 bao 4 3\n", "5685 wil 4 3\n", "3170 week' 4 5\n", "6422 farningham 4 10\n", "3183 cial 4 4\n", "3188 malekula 4 8\n", "3223 papeite 4 7\n", "5365 conkey 4 6\n", "3268 vendek 4 6\n", "3303 threeminute 4 11\n", "3607 hla 4 3\n", "4730 fello 4 5\n", "4671 'time 4 5\n", "3752 'should 4 7\n", "4518 orno 4 4\n", "3833 ist 4 3\n", "4032 afterwhile 4 10\n", "4166 do' 4 3\n", "2894 peo 4 3\n", "6435 katagiri 4 8\n", "2278 ole 4 3\n", "4061 sabati 4 6\n", "2329 kading 4 6\n", "7896 tsui 4 4\n", "7840 biddings 4 8\n", "7826 elo 4 3\n", "2363 'our 4 4\n", "7808 'good 4 5\n", "7806 speak' 4 6\n", "2420 imo 4 3\n", "7701 barotseland 4 11\n", "7643 messagefilled 4 13\n", "7636 us' 4 3\n", "7616 mur 4 3\n", "2532 kikuvi 4 6\n", "2539 selfsacrifice 4 13\n", "7347 hurch 4 5\n", "6476 iors 4 4\n", "7184 'had 4 4\n", "7172 tohouse 4 7\n", "2610 sunshiners 4 10\n", "7129 sions 4 5\n", "7125 outschool 4 9\n", "7045 'two 4 4\n", "2770 yekichi 4 7\n", "6979 ence 4 4\n", "2787 guire 4 5\n", "6919 uplook 4 6\n", "6909 'under 4 6\n", "6882 ous 4 3\n", "6704 tae 4 3\n", "6598 hildah 4 6\n", "5105 testi 4 5\n", "\n", "[331 rows x 3 columns]\n" ] } ], "source": [ "title = 'TCOG'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2017-01-04T14:26:19.510093", "start_time": "2017-01-04T14:26:19.281194" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for TMM:\n", " spell_error count word_length\n", "4361 raratonga 43 9\n", "3409 buluwayo 37 8\n", "1260 stauffer 20 8\n", "4101 carthy 20 6\n", "1743 kalaka 20 6\n", "1813 karmatar 20 8\n", "2049 hausaland 19 9\n", "2626 okohira 18 7\n", "6066 hasegawa 18 8\n", "4942 schwantes 17 9\n", "2234 basle 17 5\n", "5005 couva 17 5\n", "299 sul 17 3\n", "5688 sabbathschool 16 13\n", "5871 raiatea 15 7\n", "5101 seventhday 15 10\n", "5750 tongatabu 15 9\n", "3775 ioo 14 3\n", "4909 helsingfors 14 11\n", "1087 zambesi 14 7\n", "1536 parana 13 6\n", "2978 mangaia 13 7\n", "4326 ventists 13 8\n", "4014 shiba 12 5\n", "3880 rosas 12 5\n", "2167 ruatan 12 6\n", "5301 crespo 12 6\n", "72 gosmer 12 6\n", "4877 arrowauks 11 9\n", "1336 mis 11 3\n", "1331 ricans 11 6\n", "2965 hungaria 11 8\n", "2379 okahira 11 7\n", "5798 truxillo 11 8\n", "5355 spreckels 11 9\n", "3666 'the 10 4\n", "308 shakker 10 7\n", "4835 nonebala 10 8\n", "339 juticalpa 10 9\n", "5495 talafo 10 6\n", "2193 caribbees 10 9\n", "3410 brethern 9 8\n", "392 newyork 9 7\n", "4916 cherentes 9 9\n", "272 eromanga 9 8\n", "3433 asuncion 9 8\n", "2132 muleback 9 8\n", "200 tion 9 4\n", "4214 kupavula 9 8\n", "2718 palmquist 9 9\n", "3965 sionary 8 7\n", "1435 doble 8 5\n", "5919 henton 8 6\n", "28 esthonians 8 10\n", "1905 cina 8 4\n", "1858 dolphijn 8 8\n", "2178 bluefields 8 10\n", "1764 kumpel 8 6\n", "6168 seamans 7 7\n", "3384 esthonian 7 9\n", "1846 rican 7 5\n", "3674 tsin 7 4\n", "332 aitutaki 7 8\n", "760 neuva 7 5\n", "3773 iery 7 4\n", "2781 skaguay 7 7\n", "3861 montg 7 5\n", "1214 cakobau 7 7\n", "2631 loth 7 4\n", "1225 pellice 7 7\n", "5267 makatea 7 7\n", "3289 lettonian 7 9\n", "3583 crowther 7 8\n", "5144 fukuin 7 6\n", "1472 bootooba 7 8\n", "1498 chas 7 4\n", "3980 mandioca 7 8\n", "2508 agt 6 3\n", "2861 pre 6 3\n", "5027 eze 6 3\n", "4596 escobar 6 7\n", "1098 parvo 6 5\n", "53 ary 6 3\n", "634 pauliasi 6 8\n", "5493 tal 6 3\n", "5490 jno 6 3\n", "5377 learsy 6 6\n", "894 kwangsi 6 7\n", "4641 torre 6 5\n", "996 multum 6 6\n", "984 tse 6 3\n", "2018 ellery 6 6\n", "5224 helvecia 6 8\n", "4830 peverini 6 8\n", "1355 fel 6 3\n", "5054 goteborg 6 8\n", "1519 tien 6 4\n", "3266 umkupavula 6 10\n", "5235 fte 6 3\n", "5236 naini 5 5\n", "3697 lettonians 5 10\n", "3778 fonds 5 5\n", "5592 weekapril 5 9\n", "5643 olancho 5 7\n", "3852 tions 5 5\n", "5843 sabbathkeepers 5 14\n", "5847 handsworth 5 10\n", "5864 titikavaka 5 10\n", "6109 readingsabbath 5 14\n", "4146 blancher 5 8\n", "3305 weekdecember 5 12\n", "5350 ladrone 5 7\n", "16 marash 5 6\n", "6208 ramabai 5 7\n", "521 stanmore 5 8\n", "1563 helouan 5 7\n", "2157 por 5 3\n", "1460 levuka 5 6\n", "2169 dwyer 5 5\n", "892 balada 5 6\n", "2260 marchisio 5 9\n", "2267 pago 5 4\n", "2333 tung 5 4\n", "2557 moko 5 4\n", "2575 chaux 5 5\n", "2853 owari 5 5\n", "823 adamson 5 7\n", "2892 sundayschool 5 12\n", "2949 makomp 5 6\n", "282 toltecs 5 7\n", "229 caribbee 5 8\n", "2948 weekjuly 5 8\n", "5076 nyanza 4 6\n", "5061 bilaspur 4 8\n", "6196 moana 4 5\n", "6177 levu 4 4\n", "1246 roko 4 4\n", "1632 ostlund 4 7\n", "1456 afric 4 5\n", "241 fulahs 4 6\n", "1383 'to 4 3\n", "1357 maritzburg 4 10\n", "1250 nection 4 7\n", "1710 kalopothakes 4 12\n", "5128 robie 4 5\n", "1056 tral 4 4\n", "1220 temne 4 5\n", "319 hausfreund 4 10\n", "445 bedros 4 6\n", "453 canje 4 5\n", "852 weekjanuary 4 11\n", "844 bethuks 4 7\n", "4988 gth 4 3\n", "5496 weekmay 4 7\n", "478 ver 4 3\n", "790 curityba 4 8\n", "705 indo 4 4\n", "606 philopappos 4 11\n", "540 arrowauk 4 8\n", "536 ass'n 4 5\n", "1753 taquary 4 7\n", "3242 signes 4 6\n", "1834 mal 4 3\n", "2038 stellenbosch 4 12\n", "3898 witte 4 5\n", "2668 mollendo 4 8\n", "2674 sentative 4 9\n", "2712 guanaja 4 7\n", "2827 colvin 4 6\n", "3713 sepe 4 4\n", "3649 nickerie 4 8\n", "3585 barotse 4 7\n", "3116 savu 4 4\n", "3559 pharoah 4 7\n", "3543 kwang 4 5\n", "3123 preceeding 4 10\n", "3155 ilissionary 4 11\n", "3175 verbeck 4 7\n", "3191 galletas 4 8\n", "2364 bardizag 4 8\n", "2353 hepatization 4 12\n", "4019 pontypridd 4 10\n", "2159 cantlie 4 7\n", "2079 vou 4 3\n", "2087 onehalf 4 7\n", "4851 comandi 4 7\n", "3336 thework 4 7\n", "4819 vavau 4 5\n", "4745 selfsupporting 4 14\n", "4563 voz 4 3\n", "4035 weekseptember 4 13\n", "4457 kalmucks 4 8\n", "2205 seventyfive 4 11\n", "2240 weekfebruary 4 12\n", "4288 geraes 4 6\n", "4287 stoever 4 7\n", "4248 chineseman 4 10\n", "2139 sarmiento 4 9\n" ] } ], "source": [ "title = 'TMM'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "ExecuteTime": { "end_time": "2017-01-04T14:36:06.417135", "start_time": "2017-01-04T14:36:06.324323" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for WMH:\n", " spell_error count word_length\n", "6161 sabbathschool 170 13\n", "2631 presidenta 75 10\n", "5854 secretarym 61 10\n", "3801 treasurere 61 10\n", "581 numbersin 53 9\n", "5936 numbessin 52 9\n", "1071 horr 39 4\n", "3898 'the 36 4\n", "5002 wyla 32 4\n", "2785 kee 32 3\n", "5499 seventhday 32 10\n", "3796 presidentm 32 10\n", "195 blendon 32 7\n", "479 brower 31 6\n", "5641 harnden 30 7\n", "3945 cleora 27 6\n", "4037 ioo 25 3\n", "2456 sabbathschools 25 14\n", "5774 nunica 23 6\n", "2841 chas 23 4\n", "228 tion 22 4\n", "5964 psa 20 3\n", "5893 'to 20 3\n", "3277 loth 20 4\n", "2148 numbess 18 7\n", "6270 hoffstra 18 8\n", "910 michi 18 5\n", "5207 drury 18 5\n", "1776 'and 17 4\n", "2256 convis 16 6\n", "6634 ment 16 4\n", "4663 ence 16 4\n", "5567 secretarys 15 10\n", "6345 sabbathkeepers 15 14\n", "2724 editov 15 6\n", "3276 diamondale 15 10\n", "1954 mal 15 3\n", "566 treasurerd 15 10\n", "1692 'of 14 3\n", "3248 numbeesin 14 9\n", "1837 ainger 14 6\n", "6399 'field 14 6\n", "4656 vinancial 14 9\n", "5750 numbepsin 13 9\n", "3498 ith 13 3\n", "5975 ctory 13 5\n", "5885 ass'n 12 5\n", "2315 myrta 12 5\n", "5287 bilz 12 4\n", "436 gathereti 12 9\n", "5539 dirlctory 12 9\n", "4275 messer 11 6\n", "2413 'that 11 5\n", "2048 benefitted 11 10\n", "3563 see'y 11 5\n", "1319 'for 11 4\n", "441 hevald 11 6\n", "1180 foy 11 3\n", "2556 harriot 10 7\n", "3616 rgo 10 3\n", "5360 ereth 10 5\n", "2568 ist 10 3\n", "1568 almeda 10 6\n", "212 fvom 9 4\n", "6740 gatereth 9 8\n", "6019 onehalf 9 7\n", "2759 'in 9 3\n", "2671 editop 9 6\n", "1337 sendebud 9 8\n", "3540 gravelle 9 8\n", "1767 twentyfive 9 10\n", "1997 consistant 8 10\n", "485 tgo 8 3\n", "6273 altho 8 5\n", "5410 eze 8 3\n", "5276 ordis 8 5\n", "6066 'be 8 3\n", "2847 thr 8 3\n", "1861 numbevsin 8 9\n", "3936 oth 8 3\n", "6702 soo 8 3\n", "500 ers 8 3\n", "1321 ple 8 3\n", "5465 ference 8 7\n", "1598 mchugh 8 6\n", "1951 palmiter 8 8\n", "5277 allister 8 8\n", "1425 mis 8 3\n", "199 educationa 8 10\n", "3298 garton 7 6\n", "3376 phippeny 7 8\n", "366 vield 7 5\n", "5482 schooi 7 6\n", "6295 mavgavet 7 8\n", "5846 ance 7 4\n", "3426 ilee 7 4\n", "3032 pre 7 3\n", "2216 whi 7 3\n", "2235 hof 7 3\n", "5126 hsi 7 3\n", "... ... ... ...\n", "1051 allthe 6 6\n", "1232 numbewsin 6 9\n", "2491 thro 6 4\n", "352 hausfreund 6 10\n", "3431 veap 6 4\n", "1800 tobe 6 4\n", "1970 selfdenial 6 10\n", "6183 whitford 5 8\n", "5582 swahn 5 5\n", "6100 prpartnunt 5 10\n", "4920 vicepresident 5 13\n", "4989 approbativeness 5 15\n", "984 ilaughey 5 8\n", "5051 vaktare 5 7\n", "2102 educa 5 5\n", "5374 gth 5 3\n", "1325 scandanavian 5 12\n", "2485 arnadon 5 7\n", "4769 sions 5 5\n", "1486 sooncoming 5 10\n", "2455 cudney 5 6\n", "5452 ssued 5 5\n", "1652 matthewson 5 10\n", "5771 iio 5 3\n", "6250 hotstra 5 7\n", "2338 seventyfive 5 11\n", "2842 numbeasin 5 9\n", "3096 medicial 5 8\n", "3499 sel 5 3\n", "6756 tti 5 3\n", "149 rooo 5 4\n", "3895 sundayschool 5 12\n", "6335 natches 5 7\n", "4001 thallie 5 7\n", "3245 numbessln 5 9\n", "591 ments 5 5\n", "550 ent 5 3\n", "3078 raiatea 5 7\n", "4188 ished 5 5\n", "444 kamstra 4 7\n", "6573 haugbev 4 7\n", "5692 bea 4 3\n", "1751 'twixt 4 6\n", "6753 myrtie 4 6\n", "255 isthe 4 5\n", "281 iooo 4 4\n", "339 numbensin 4 9\n", "1554 gatmereth 4 9\n", "6490 alloted 4 7\n", "6292 wil 4 3\n", "1405 accomodate 4 10\n", "6388 ass't 4 5\n", "6257 newago 4 6\n", "5934 'is 4 3\n", "398 stra 4 4\n", "1263 igth 4 4\n", "5953 'this 4 5\n", "892 conven 4 6\n", "1154 nee 4 3\n", "876 medler 4 6\n", "3525 christlicher 4 12\n", "1873 gress 4 5\n", "3287 afew 4 4\n", "2951 pri 4 3\n", "2981 committe 4 8\n", "4321 whereever 4 9\n", "4272 sionary 4 7\n", "3162 wer 4 3\n", "3977 onethird 4 8\n", "3239 greenman 4 8\n", "3306 terest 4 6\n", "4795 editott 4 7\n", "3354 ool 4 3\n", "3797 tennesee 4 8\n", "3424 ung 4 3\n", "3671 'great 4 6\n", "3656 nrws 4 4\n", "3649 ject 4 4\n", "3586 tes 4 3\n", "4794 thi 4 3\n", "2758 'school 4 7\n", "1999 ful 4 3\n", "5198 'them 4 5\n", "5468 watchcare 4 9\n", "2093 'us 4 3\n", "2115 'are 4 4\n", "2293 twentythree 4 11\n", "3501 'work 4 5\n", "5270 ebucattonal 4 11\n", "5229 hevaid 4 6\n", "5193 'at 4 3\n", "4853 edu 4 3\n", "5141 igan 4 4\n", "2558 eachern 4 7\n", "2562 lle 4 3\n", "5117 selfsupporting 4 14\n", "5062 'as 4 3\n", "2680 reapeti 4 7\n", "4932 mrse 4 4\n", "6797 tencent 4 7\n", "\n", "[213 rows x 3 columns]\n" ] } ], "source": [ "title = 'WMH'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "ExecuteTime": { "end_time": "2017-01-04T14:54:22.269652", "start_time": "2017-01-04T14:54:21.942976" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for YI:\n", " spell_error count word_length\n", "85050 sabbathschool 607 13\n", "3429 'the 408 4\n", "65800 'em 399 3\n", "65897 eze 316 3\n", "45605 xil 315 3\n", "43104 ver 302 3\n", "49812 sel 254 3\n", "30373 tion 227 4\n", "31970 mal 214 3\n", "44178 'of 211 3\n", "63298 agt 205 3\n", "29354 'to 197 3\n", "98016 'neath 180 6\n", "44266 twentyfive 168 10\n", "19616 'and 168 4\n", "40114 ioo 159 3\n", "20722 pre 152 3\n", "22006 guire 151 5\n", "25382 'he 149 3\n", "2950 'cause 148 6\n", "33873 iden 148 4\n", "11323 'be 138 3\n", "50376 goin 133 4\n", "11590 ass't 131 5\n", "70800 sangster 130 8\n", "72425 s'pose 121 6\n", "69839 milly 120 5\n", "48903 yovt 112 4\n", "15826 peloubet 110 8\n", "96169 ome 110 3\n", "32018 ful 109 3\n", "72003 xiil 109 4\n", "28669 hsi 109 3\n", "44649 ettez 106 5\n", "28039 stuttle 105 7\n", "5881 ment 103 4\n", "5158 lxv 102 3\n", "44118 chas 101 4\n", "2433 'in 100 3\n", "28295 yovti 100 5\n", "18041 lviii 99 5\n", "30283 kibbin 99 6\n", "41011 rosilla 97 7\n", "86980 structor 92 8\n", "83373 'his 91 4\n", "12976 ili 90 3\n", "96112 tle 90 3\n", "24063 sha 90 3\n", "48821 'mid 89 4\n", "20073 'by 87 3\n", "94355 lxiii 87 5\n", "81501 georgie 84 7\n", "22359 hutt 84 4\n", "23894 micr 83 4\n", "43960 it' 82 3\n", "88334 'but 81 4\n", "44704 onehalf 80 7\n", "26674 ers 80 3\n", "54236 'most 78 5\n", "13223 r'y 77 3\n", "53077 susy 77 4\n", "60361 me' 76 3\n", "33151 'round 76 6\n", "42433 'have 74 5\n", "82382 gertie 74 6\n", "55587 howson 74 6\n", "82149 'way 71 4\n", "57145 'that 71 5\n", "72128 seventhday 71 10\n", "14816 kee 70 3\n", "65375 'ye 70 3\n", "70666 sundayschool 69 12\n", "4832 'bout 69 5\n", "79465 conybeare 68 9\n", "38125 marden 68 6\n", "25282 mis 67 3\n", "58752 teddie 67 6\n", "66264 riis 67 4\n", "29182 nyassaland 67 10\n", "54249 'is 67 3\n", "26458 nanking 66 7\n", "3084 neesima 64 7\n", "15645 tions 64 5\n", "91428 sabbathkeepers 64 14\n", "27307 ther 64 4\n", "66474 'was 62 4\n", "31600 ole 62 3\n", "33259 cunliffe 62 8\n", "6227 zambesi 61 7\n", "63809 ets 60 3\n", "66094 soo 60 3\n", "16738 'twill 60 6\n", "40810 'had 60 4\n", "70250 twentyfour 59 10\n", "56162 ple 59 3\n", "60997 ber 59 3\n", "59428 liii 58 4\n", "36197 gon 58 3\n", "13131 'for 58 4\n", "21637 sus 57 3\n", "... ... ... ...\n", "20425 months' 4 7\n", "66786 nrt 4 3\n", "66913 countri 4 7\n", "66798 alie 4 4\n", "66804 appius 4 6\n", "66854 pleag 4 5\n", "20832 whotn 4 5\n", "66879 noss 4 4\n", "66884 sophronia 4 9\n", "20750 ccesarea 4 8\n", "66938 feneberg 4 8\n", "67170 ivas 4 4\n", "20743 whenthe 4 7\n", "20684 light' 4 6\n", "20638 illfated 4 8\n", "20587 'nom 4 4\n", "20500 sanyasi 4 7\n", "20481 sengers 4 7\n", "67168 moung 4 5\n", "65581 ansdell 4 7\n", "65464 wih 4 3\n", "63449 doren 4 5\n", "64307 pressive 4 8\n", "22753 ndt 4 3\n", "64186 missio 4 6\n", "64203 hoppy 4 5\n", "22691 ungratified 4 11\n", "22633 eath 4 4\n", "64287 medhurst 4 8\n", "64293 ninus 4 5\n", "64308 schule 4 6\n", "64072 culti 4 5\n", "22624 auber 4 5\n", "64357 leddy 4 5\n", "22593 inmuotor 4 8\n", "22559 aymar 4 5\n", "22550 ifr 4 3\n", "64522 cambo 4 5\n", "22455 senales 4 7\n", "64077 cuautemoch 4 10\n", "63994 iolani 4 6\n", "64603 abled 4 5\n", "63747 thisis 4 6\n", "63465 wic 4 3\n", "63503 epworthian 4 10\n", "63583 kap 4 3\n", "23027 dolson 4 6\n", "22963 coopersburgh 4 12\n", "63693 blisses 4 7\n", "63702 cellent 4 7\n", "22962 shortland 4 9\n", "63965 bertel 4 6\n", "63807 sawa 4 4\n", "22951 ister 4 5\n", "22950 laon 4 4\n", "63848 hattusil 4 8\n", "22922 edvard 4 6\n", "63900 paulonia 4 8\n", "63944 nauplia 4 7\n", "64577 namur 4 5\n", "64700 lossing 4 7\n", "65442 douly 4 5\n", "21966 vealed 4 6\n", "65091 squier 4 6\n", "22076 adapte 4 6\n", "65121 terial 4 6\n", "22065 gohna 4 5\n", "65201 trom 4 4\n", "22021 ock 4 3\n", "65218 nemorosa 4 8\n", "65256 leutze 4 6\n", "22078 gurdy 4 5\n", "65265 titterington 4 12\n", "21885 chriit 4 6\n", "21875 rajputs 4 7\n", "65359 orks 4 4\n", "65371 printingoffice 4 14\n", "21866 edny 4 4\n", "65417 spiker 4 6\n", "65079 jis 4 3\n", "65031 think' 4 6\n", "64745 'during 4 7\n", "64881 constrainem 4 11\n", "22405 vio 4 3\n", "64764 playin 4 6\n", "64769 fire' 4 5\n", "64784 walburga 4 8\n", "64806 representa 4 10\n", "64858 tidende 4 7\n", "64878 seveneighths 4 12\n", "22331 lispings 4 8\n", "22147 leadbetters 4 11\n", "64907 gnd 4 3\n", "64939 leontes 4 7\n", "64941 worke 4 5\n", "64942 ooks 4 4\n", "64981 letow 4 5\n", "64987 'cried 4 6\n", "22233 eof 4 3\n", "4 constraiheth 4 12\n", "\n", "[6563 rows x 3 columns]\n" ] } ], "source": [ "title = 'YI'\n", "print(\"Summary for {}:\".format(title))\n", "df = results_to_df(title)\n", "results = query_df(df, 3, 2, 'count')\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'commit_hash': '5c9c918',\n", " 'commit_source': 'installation',\n", " 'default_encoding': 'UTF-8',\n", " 'ipython_path': '/Users/jeriwieringa/miniconda3/envs/dissertation2/lib/python3.5/site-packages/IPython',\n", " 'ipython_version': '5.1.0',\n", " 'os_name': 'posix',\n", " 'platform': 'Darwin-16.1.0-x86_64-i386-64bit',\n", " 'sys_executable': '/Users/jeriwieringa/miniconda3/envs/dissertation2/bin/python',\n", " 'sys_platform': 'darwin',\n", " 'sys_version': '3.5.2 |Continuum Analytics, Inc.| (default, Jul 2 2016, '\n", " '17:52:12) \\n'\n", " '[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]'}\n", "anaconda-client==1.5.5\n", "appnope==0.1.0\n", "argh==0.26.1\n", "blinker==1.4\n", "bokeh==0.12.3\n", "boto==2.43.0\n", "bz2file==0.98\n", "chest==0.2.3\n", "cloudpickle==0.2.1\n", "clyent==1.2.2\n", "dask==0.12.0\n", "datashader==0.4.0\n", "datashape==0.5.2\n", "decorator==4.0.10\n", "docutils==0.12\n", "doit==0.29.0\n", "gensim==0.12.4\n", "Ghost.py==0.2.3\n", "ghp-import2==1.0.1\n", "gspread==0.4.1\n", "HeapDict==1.0.0\n", "httplib2==0.9.2\n", "husl==4.0.3\n", "ipykernel==4.5.2\n", "ipython==5.1.0\n", "ipython-genutils==0.1.0\n", "ipywidgets==5.2.2\n", "Jinja2==2.8\n", "jsonschema==2.5.1\n", "jupyter==1.0.0\n", "jupyter-client==4.4.0\n", "jupyter-console==5.0.0\n", "jupyter-core==4.2.1\n", "llvmlite==0.14.0\n", "locket==0.2.0\n", "Logbook==1.0.0\n", "lxml==3.5.0\n", "MacFSEvents==0.7\n", "Mako==1.0.4\n", "Markdown==2.6.7\n", "MarkupSafe==0.23\n", "mistune==0.7.3\n", "multipledispatch==0.4.9\n", "natsort==4.0.4\n", "nb-anacondacloud==1.2.0\n", "nb-conda==2.0.0\n", "nb-conda-kernels==2.0.0\n", "nb-config-manager==0.1.3\n", "nbbrowserpdf==0.2.1\n", "nbconvert==4.2.0\n", "nbformat==4.2.0\n", "nbpresent==3.0.2\n", "networkx==1.11\n", "Nikola==7.7.7\n", "nltk==3.2.1\n", "notebook==4.2.3\n", "numba==0.29.0\n", "numpy==1.11.2\n", "oauth2client==4.0.0\n", "odo==0.5.0\n", "pandas==0.19.1\n", "partd==0.3.6\n", "path.py==0.0.0\n", "pathtools==0.1.2\n", "pexpect==4.0.1\n", "pickleshare==0.7.4\n", "Pillow==3.4.2\n", "prompt-toolkit==1.0.9\n", "ptyprocess==0.5.1\n", "pyasn1==0.1.9\n", "pyasn1-modules==0.0.8\n", "pycrypto==2.6.1\n", "Pygments==2.1.3\n", "PyPDF2==1.25.1\n", "PyRSS2Gen==1.1\n", "python-dateutil==2.6.0\n", "pytz==2016.10\n", "PyYAML==3.12\n", "pyzmq==16.0.2\n", "qtconsole==4.2.1\n", "requests==2.12.3\n", "rsa==3.4.2\n", "scipy==0.18.1\n", "simplegeneric==0.8.1\n", "six==1.10.0\n", "smart-open==1.3.5\n", "terminado==0.6\n", "textblob==0.11.1\n", "toolz==0.8.1\n", "tornado==4.4.2\n", "traitlets==4.3.1\n", "Unidecode==0.4.19\n", "watchdog==0.8.3\n", "wcwidth==0.1.7\n", "webassets==0.11.1\n", "widgetsnbextension==1.2.6\n", "ws4py==0.3.4\n", "xarray==0.8.2\n", "Yapsy==1.11.223\n" ] } ], "source": [ "# %load shared_elements/system_info.py\n", "import IPython\n", "print (IPython.sys_info())\n", "!pip freeze" ] } ], "metadata": { "anaconda-cloud": { "attach-environment": true, "summary": "Overview of frequent \"errors\" across all titles." }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" }, "toc": { "nav_menu": { "height": "12px", "width": "252px" }, "navigate_menu": true, "number_sections": true, "sideBar": false, "threshold": "3", "toc_cell": true, "toc_section_display": "block", "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 2 }