inspect-frequent-spelling-errors-round4
Table of Contents¶
In [1]:
import pandas as pd
pd.options.display.max_rows = 200
import os
In [2]:
dir_ = '/Users/jeriwieringa/Dissertation/drafts/data/spelling-statistics/round4/'
In [3]:
titles = ["ADV", "AmSn", "ARAI", "CE", "CUV", "EDU", "GCB", "GH", "GOH", "GS", "HM", "HR",
"IR", "LB", "LH", "LibM", "LUH", "NMN","PHJ","PTAR","PUR","RH","Sligo","SOL",
"ST","SUW","TCOG","TMM","WMH","YI"]
In [4]:
def results_to_df(title):
for filename in os.listdir(dir_):
if filename.endswith("{}.txt".format(title)):
df = pd.read_csv(dir_ + filename)
df['word_length'] = df['spell_error'].str.len()
return(df)
As the goal here is to identify words that should be added to the spell check list, I am dropping all words with a count of "1" and all single letter words.
In [5]:
def query_df(df, count_value, length_value, sort_by):
return(df.query('count > {} & word_length > {}'.format(count_value, length_value)).sort_values(sort_by, ascending=False))
In [6]:
title = 'ADV'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for ADV:
spell_error count word_length
6805 tion 807 4
345 dren 329 4
8406 chil 326 4
4459 educa 323 5
13967 ment 304 4
8620 n't 234 3
5858 ers 208 3
3554 tions 202 5
8709 edu 175 3
10179 pre 174 3
4965 ence 160 4
6314 ple 135 3
10154 mis 135 3
10621 tional 128 6
9562 tian 126 4
853 ith 125 3
9528 ful 112 3
10503 ments 98 5
10823 ent 96 3
7684 ber 96 3
13643 peo 93 3
7142 ofthe 90 5
13878 prin 87 4
10864 ture 84 4
5326 ucation 81 7
7257 struction 74 9
4823 chas 70 4
12444 lege 68 4
15542 ance 68 4
4999 sionary 68 7
1990 ents 67 4
13409 tem 65 3
13980 agt 64 3
3586 ciples 64 6
1020 ary 63 3
921 ble 63 3
12270 ual 58 3
9463 ure 55 3
1797 ference 54 7
8404 bers 52 4
1527 ject 52 4
2843 sys 51 3
2729 instruc 50 7
11096 experi 50 6
196 tle 49 3
4705 coun 49 4
3448 princi 47 6
8834 'll 46 3
7217 knowl 45 5
1841 dustrial 45 8
15997 ning 43 4
2687 accom 43 5
12501 ical 42 4
15325 eral 42 4
6481 prac 42 4
7975 ples 41 4
4601 ters 41 4
166 har 41 3
10144 sloyd 40 5
6903 perience 40 8
9696 cational 40 8
16693 lan 40 3
16430 ork 39 3
11955 oppor 39 5
3870 suc 39 3
149 tained 39 6
420 mer 39 3
13245 lished 39 6
3502 tis 39 3
7184 impor 38 5
10655 hile 38 4
8931 institu 38 7
4095 dif 38 3
939 sions 35 5
16133 tance 35 5
1323 ucational 35 9
781 neces 34 5
3302 estab 34 5
14439 anoka 34 5
1859 ceived 34 6
10554 tjt 33 3
7960 tbe 33 3
11823 ver 33 3
6768 arith 33 5
15626 tunity 32 6
12094 prepara 32 7
6255 sible 31 5
15437 partment 31 8
1290 wil 31 3
6455 dred 31 4
11231 pils 31 4
17002 tary 31 4
16588 proph 31 5
2160 ered 31 4
7200 direc 31 5
2411 dence 30 5
842 jects 30 5
9964 ous 30 3
14996 tlie 30 4
16270 tive 29 4
... ... ... ...
14571 ington 4 6
14564 'of 4 3
8981 quence 4 6
9008 ofour 4 5
9012 riences 4 7
9050 gradu 4 5
9051 investi 4 7
9077 ’ou 4 3
9098 kankakee 4 8
14437 cident 4 6
9223 nally 4 5
9235 father’s 4 8
14417 txi 4 3
9291 sibilities 4 10
1607 ucators 4 7
14382 strated 4 7
9347 mented 4 6
9436 vis 4 3
14305 lub 4 3
14595 secretaryof 4 11
14611 prindle 4 7
2083 sul 4 3
4667 windham 4 7
14884 ganize 4 6
14868 tainment 4 8
14862 geni 4 4
1412 perma 4 5
8630 arner 4 5
8656 departm 4 7
8671 expi 4 4
4858 slialt 4 6
14785 erence 4 6
8736 citv 4 4
4826 dicate 4 6
14775 wrhat 4 5
1446 tbeir 4 5
4814 ously 4 5
14748 baby’s 4 6
4786 expedted 4 8
14704 tists 4 5
14692 mained 4 6
14691 astrong 4 7
4715 school' 4 7
1460 pers 4 4
14665 sota 4 4
2989 diredt 4 6
4511 jxrir 4 5
14287 kellar 4 6
4510 uncon 4 5
4488 dia 4 3
13727 jno 4 3
1873 cun 4 3
4303 thejr 4 5
13721 servation 4 9
13709 und 4 3
1888 tobin 4 5
9890 tral 4 4
9913 farmington 4 10
4288 gravsville 4 10
1936 foi 4 3
4258 eord 4 4
10035 gbaw 4 4
4211 cesses 4 6
10115 guages 4 6
13584 tenance 4 7
1947 satisfac 4 8
10174 conclu 4 6
4119 oti 4 3
4066 freshies 4 8
1982 dic 4 3
10243 buluwayo 4 8
10284 christain 4 9
10309 'to 4 3
4319 centsayear 4 10
13763 theless 4 7
4338 cuse 4 4
14139 pelled 4 6
4444 clared 4 6
4438 ioi 4 3
1663 jhe 4 3
9591 byr 4 3
4431 blos 4 4
14243 gowdy 4 5
9606 excep 4 5
14224 ves 4 3
9626 expla 4 5
1698 atid 4 4
1770 hol 4 3
13791 fadts 4 5
9660 tials 4 5
9677 thk 4 3
9712 atson 4 5
1775 imi 4 3
13995 clusively 4 9
9723 trons 4 5
9725 amination 4 9
13964 tir 4 3
9747 mbd 4 3
13808 chinery 4 7
11996 mieh 4 4
[1490 rows x 3 columns]
In [7]:
title = 'AmSn'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for AmSn:
spell_error count word_length
22881 n't 2138 3
20525 'the 431 4
5986 tion 317 4
12689 seventhday 258 10
30463 indorsed 250 8
43697 'of 232 3
30788 satolli 230 7
14935 employes 208 8
57591 munn 206 4
7727 'll 200 3
35250 religio 195 7
27685 ment 194 4
47657 kee 187 3
1332 cmsar 185 5
21690 indorse 171 7
46929 ringgold 167 8
55394 'to 153 3
6341 breckinridge 146 12
18033 allister 138 8
45304 pre 136 3
51360 bateham 129 7
43047 csar 119 4
21218 schaff 119 6
27616 aleck 112 5
12493 socalled 108 8
47956 erican 107 6
28305 milly 103 5
48878 sentin 103 6
32546 capps 96 5
58932 'is 95 3
33977 tions 94 5
13591 ican 94 4
31801 palmeter 94 8
58 neander 92 7
40884 'that 91 5
35412 're 86 3
28440 eze 84 3
40442 'and 83 4
3341 'in 83 3
46342 've 83 3
59823 sundaylaw 81 9
9881 epworth 81 7
42692 messrs 81 6
114 stundists 81 9
56143 edmunds 80 7
37278 cereola 79 7
19514 chas 79 4
23977 haskins 77 7
21357 thi 76 3
52560 ofthe 75 5
32748 lld 74 3
28194 freethought 67 11
12991 coxey 64 5
12455 connell 63 7
27340 avenola 62 7
2300 endeavorer 59 10
45125 attaches 58 8
9170 ments 58 5
15882 rican 57 5
44605 intrusted 57 9
48111 tional 56 6
37458 anierican 56 9
9722 'not 56 4
54911 paeifie 56 7
6032 candidus 56 8
14519 fifield 56 7
5567 ple 55 3
8285 geikie 54 6
15599 indorsing 54 9
44738 tregelles 54 9
17668 ernment 54 7
16292 employe 53 7
5693 dred 53 4
33027 ity 53 3
729 depew 52 5
47687 krug 51 4
48086 obion 51 5
39056 aivierican 51 10
48366 assoeiation 51 11
55214 englewood 50 9
34162 inthe 50 5
42041 litt 50 4
55772 aro 50 3
27069 cuyler 50 6
57746 wellknown 50 9
55693 sabbaththe 50 10
40575 mallett 48 7
39495 leiper 48 6
23741 tian 48 4
32400 cathedra 47 8
30232 opposers 47 8
30015 medo 46 4
40613 'be 46 3
40369 ent 45 3
52072 kai 45 3
34852 sundayclosing 45 13
29829 stuttle 44 7
18206 forit 44 5
39807 judefind 44 8
19273 keane 44 5
... ... ... ...
13666 ublished 4 8
37236 banishments 4 11
46641 'human 4 6
48707 epi 4 3
14494 irs 4 3
15234 spiritand 4 9
15179 'render 4 7
23889 shbnah 4 6
15134 'earth 4 6
46068 'worship 4 8
37357 beauti 4 6
15033 libertythat 4 11
37346 ganization 4 10
14972 principlea 4 10
14910 mur 4 3
23931 cas 4 3
46207 craham 4 6
23970 oneman 4 6
14847 kingd 4 5
24027 rality 4 6
14782 'general 4 8
14776 mens 4 4
14732 pocus 4 5
46344 beand 4 5
14730 intermeddler 4 12
14726 legislati 4 9
46426 ballentine 4 10
14645 bouvier 4 7
37258 holydays 4 8
14612 bickerings 4 10
24192 tures 4 5
46597 koenig 4 6
46601 lene 4 4
14549 tious 4 5
47457 selfstyled 4 10
36877 godlikeness 4 11
13655 errone 4 6
47529 eccle 4 5
48097 bestto 4 6
36459 gert 4 4
48116 wie 4 3
12564 fiftyfirst 4 10
48197 thf 4 3
36458 answera 4 7
48246 twentyseven 4 11
12553 peoplenot 4 9
36445 sanctities 4 10
48347 catholie 4 8
12418 delambre 4 8
48367 faiththe 4 8
36426 acific 4 6
48407 intyre 4 6
12413 'much 4 5
24739 powe 4 4
36347 statemanship 4 12
12279 eign 4 4
12275 ormed 4 5
12171 amv 4 3
11993 discernable 4 11
48516 dror 4 4
11916 olneyville 4 10
24809 itdividual 4 10
11892 'ye 4 3
48617 ited 4 4
11881 purpo 4 5
36305 innes 4 5
11797 sonship 4 7
24716 cisions 4 7
12814 yosemit 4 7
12931 consciencethis 4 14
36683 theonly 4 7
13602 ather 4 5
13600 democratism 4 11
13566 sawbath 4 7
24478 frse 4 4
47647 protectories 4 12
36826 caulay 4 6
47690 lishing 4 7
24667 charleton 4 9
13351 publishkng 4 10
47734 fica 4 4
24701 oom 4 3
47746 rer 4 3
47762 hol 4 3
36660 cus 4 3
13061 nrs 4 3
47810 fastwill 4 8
36652 appre 4 5
36617 snd 4 3
47878 tured 4 5
13300 hackmen 4 7
36609 suger 4 5
36533 thig 4 4
47931 engler 4 6
13166 philomath 4 9
13071 sabbatizing 4 11
47968 oal 4 3
47976 king' 4 5
36522 thisis 4 6
30490 moen 4 4
[3381 rows x 3 columns]
In [8]:
title = 'ARAI'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for ARAI:
spell_error count word_length
162 rockyhill 9 9
467 stowell 8 7
455 cheo 7 4
226 k'o 7 3
476 sha 6 3
341 parana 6 6
453 friedenstal 6 11
494 nyanza 5 6
295 mch 5 3
119 chitonga 5 8
409 nyassa 5 6
411 solusi 4 6
20 gnedjen 4 7
393 kavirondo 4 9
202 vuasu 4 5
160 majita 4 6
123 rentfro 4 7
350 somabula 4 8
In [9]:
title = 'CE'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for CE:
spell_error count word_length
8780 n't 727 3
195 'll 162 3
7207 manumental 133 10
1408 kibbin 56 6
7387 adelphian 52 9
236 've 52 3
6995 tion 45 4
2986 millis 44 6
4171 tis 43 3
5404 're 42 3
213 tle 37 3
1158 tne 26 3
6648 ginn 26 4
958 ful 26 3
4925 nurses' 24 7
5580 claxton 24 7
396 dren 24 4
4485 murry 24 5
6502 delpha 23 6
1571 pre 23 3
1989 lusio 22 5
1692 dioxid 22 6
4415 wyclif 22 6
8195 myrta 22 5
8289 revell 21 6
4874 lomb 20 4
4965 flexner 20 7
5217 sheyenne 20 8
4227 maplewood 20 9
5570 taquary 20 7
1760 tunesassa 20 9
8572 chil 20 4
7377 'ry 20 3
5160 goldsberry 20 10
502 lippincott 19 10
6160 'the 19 4
6867 ers 19 3
970 preceptresses 19 13
2741 bausch 18 6
52 lul 18 3
623 hildebran 18 9
5396 ment 17 4
7889 ber 17 3
739 mvp 16 3
6996 mvo 16 3
828 plainview 16 9
3222 laurelwood 16 10
6503 imo 15 3
3694 tions 15 5
7005 eufola 14 6
6621 anb 14 3
346 haapai 14 6
7209 teacherage 13 10
204 cready 13 6
8179 sus 13 3
1548 mis 13 3
5999 seventhday 13 10
5674 syllabi 13 7
5308 prin 13 4
482 mer 13 3
4547 rowell 13 6
1709 'to 12 3
2267 ture 12 4
341 sloyd 11 5
5992 'of 11 3
318 ight 11 4
3910 kernelocorn 11 11
4847 exousia 11 7
5075 halfyear 11 8
76 adventista 11 10
6484 it' 10 3
848 vis 10 3
7016 eldredge 10 8
8178 jarnboas 10 8
8381 eze 10 3
6246 thos 10 4
3410 bez 10 3
7484 ioo 10 3
6147 seventhand 10 10
3676 tio 10 3
2304 colegio 10 7
3787 waikato 10 7
1018 sions 10 5
8126 ovalau 10 6
2607 lornedale 10 9
2211 sul 10 3
1768 latshaw 10 7
7900 ver 10 3
5448 dont 10 4
4893 jes 10 3
490 iiii 10 4
4566 hillcrest 9 9
4778 ther 9 4
3154 divi 9 4
5936 churchschool 9 12
4524 sirable 9 7
3436 nyhyttan 9 8
5764 ade 9 3
1486 buresala 9 8
1248 welltrained 9 11
... ... ... ...
465 pursual 5 7
5856 sixtyfive 5 9
4865 tbe 5 3
4873 coun 4 4
8660 fortyfive 4 9
1111 brenke 4 6
4474 sangster 4 8
7938 dishwashing 4 11
2092 ents 4 4
8588 fernwood 4 8
8637 ral 4 3
5133 tra 4 3
7816 sayce 4 5
1070 sirup 4 5
1959 priori 4 6
7752 freeset 4 7
4460 baro 4 4
4418 goodloe 4 7
639 memoriam 4 8
5202 sionary 4 7
2320 flow'rs 4 7
7598 beauti 4 6
8700 farreaching 4 11
8570 bers 4 4
1053 burmans 4 7
1560 burdett 4 7
1912 wheatless 4 9
1473 mee 4 3
8481 dinsmore 4 8
992 tian 4 4
8342 tubere 4 6
1613 mmmmmm 4 6
1448 fitchburg 4 9
1661 ecole 4 5
1676 voyce 4 5
1679 loth 4 4
945 ith 4 3
4752 untechnical 4 11
7515 duqoin 4 6
4922 preeeptresses 4 13
1197 allround 4 8
4931 academie 4 8
4848 teachers' 4 9
56 wel 4 3
1866 unpedagogical 4 13
5001 'neath 4 6
8147 eighthgrade 4 11
5298 'em 4 3
2372 wirt 4 4
7457 ove 4 3
5702 dunamis 4 7
3082 serampur 4 8
5724 connell 4 7
3084 crowell 4 7
3108 wiggin 4 6
3166 gillott 4 7
6565 ucation 4 7
5787 bab 4 3
3269 excellences 4 11
3280 postum 4 6
3301 milner 4 6
6444 boundarylines 4 13
4070 farland 4 7
3320 wellregulated 4 13
3398 ordinating 4 10
3476 hilprecht 4 9
6221 robie 4 5
3521 nally 4 5
5843 harlen 4 6
3586 oth 4 3
3636 aik 4 3
4051 timehonored 4 11
5874 patsey 4 6
6662 prac 4 4
6664 ulty 4 4
6714 homiletical 4 11
4137 afe 4 3
7456 sidewise 4 8
5964 tetzlaff 4 8
5323 'it 4 3
7354 flagg 4 5
7323 das 4 3
2490 tuitions 4 8
4170 openair 4 7
2674 sabbathschool 4 13
5450 literatures 4 11
2800 'that 4 5
7071 ies 4 3
3068 twelvegrade 4 11
7039 owne 4 4
2821 trilliums 4 9
2888 mit 4 3
2952 cli 4 3
2978 proteid 4 7
6912 splain 4 6
6909 'twould 4 7
5583 godfearing 4 10
6868 lation 4 6
4843 fehling 4 7
1058 parentteacher 4 13
[341 rows x 3 columns]
In [10]:
title = 'CUV'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for CUV:
spell_error count word_length
13919 ppf 2144 3
25398 'the 510 4
16367 brownlee 459 8
20052 chas 446 4
10106 sabbathschool 362 13
20788 luzerne 361 7
44554 'of 332 3
13057 seventhday 324 10
46923 reichenbach 312 11
2780 elphatrick 307 10
15194 morgantown 240 10
44576 buttermore 232 10
41310 'and 229 4
56418 'to 216 3
20403 leesburg 212 8
17184 bfl 208 3
40409 barto 206 5
26122 columbiana 204 10
19271 hicksville 199 10
48767 gearhart 199 8
57941 paden 198 5
37636 oertley 187 7
35831 syphers 182 7
20825 yingling 182 8
48291 phila 173 5
33894 dowell 173 6
44209 tolliver 170 8
49251 dowling 170 7
55177 conneaut 169 8
4230 westmoreland 168 12
11915 charloe 166 7
61891 broughton 159 9
49726 pengelly 157 8
34476 meigs 155 5
43721 dunkinson 150 9
37481 tion 148 4
12967 corry 147 5
24454 apsley 147 6
32656 silber 145 6
44853 barnesville 142 11
19894 lehigh 142 6
25821 massillon 141 9
24914 pre 138 3
7090 gerhart 138 7
55700 brownell 137 8
44763 smithsburg 136 10
62599 wanteda 134 7
3051 midkiff 133 7
42654 stroudsburg 131 11
33542 kohr 126 4
4561 tioga 126 5
39973 harford 125 7
3421 'in 124 3
41492 'be 123 3
62057 zimmerly 116 8
9630 richland 115 8
21950 thi 115 3
8395 maloney 111 7
36874 eusey 109 5
20306 mingo 108 5
112 searles 108 7
53621 mahoning 106 8
30013 cabell 103 6
19229 pemberville 103 11
53562 ofthe 102 5
11076 bookmen 101 7
25036 muskingum 101 9
57794 braxton 100 7
20358 isitor 99 6
11455 carbondale 98 10
29889 greenspring 97 11
49597 cobr 97 4
51886 gordonsville 96 12
44508 wytheville 96 10
29347 marysville 95 10
14228 pickaway 95 8
40000 paulding 95 8
54282 meadville 94 9
41740 'that 93 5
36965 ashtabula 93 9
24323 sayre 92 5
28052 carthy 92 6
42365 hubbell 92 7
36383 heaton 91 6
22290 bentz 90 5
55583 wellsboro 90 9
47723 vanzant 90 7
9937 bassler 89 7
32527 fairhill 88 8
55232 mis 88 3
7192 blest 88 5
16898 rager 85 5
53012 garmo 85 5
6472 cuyahoga 84 8
62491 miscl 84 5
45618 honesdale 83 9
14254 twentyfive 83 10
29827 eachern 83 7
23868 conwell 82 7
3833 monongalia 82 10
... ... ... ...
28591 gestions 4 8
28579 'members 4 8
28535 osed 4 4
28497 rii 4 3
28496 recanvassing 4 12
32431 'near 4 5
32497 missionory 4 10
38127 ris 4 3
35948 'sin 4 4
36597 edito 4 5
36445 ular 4 4
36366 chlo 4 4
36283 ttt 4 3
36082 religio 4 7
36081 lyconing 4 8
36057 firstday 4 8
35992 'boys 4 5
35962 mohler 4 6
35927 cti 4 3
35159 againit 4 7
35857 'cents 4 6
35843 springlield 4 11
35752 affort 4 6
35664 'carry 4 6
35501 emmons 4 6
35480 'connection 4 11
35426 murry 4 5
35258 prayermeeting 4 13
35232 elvaton 4 7
36642 fbr 4 3
36899 tihe 4 4
36925 conw 4 4
37015 youwill 4 7
37965 misel 4 5
37963 gli 4 3
37959 'part 4 5
37900 sil 4 3
37871 lord' 4 5
37836 'took 4 5
37793 birt 4 4
37750 bordentown 4 10
37611 weat 4 4
37499 appre 4 5
37469 uernon 4 6
37436 excutive 4 8
37411 mookerjie 4 9
37388 reiehenbach 4 11
37380 peoplein 4 8
37287 'place 4 6
37191 agents' 4 7
37174 kil 4 3
37027 alio 4 4
35224 nol 4 3
35131 ruthenians 4 10
32689 oon 4 3
33282 ommittee 4 8
33747 othet 4 5
33712 appr 4 4
33692 summitt 4 7
33601 landmuckfrom 4 12
33474 bre 4 3
33463 gouldsboro 4 10
33431 worldthe 4 8
33349 hord 4 4
33290 confe 4 5
33246 'showed 4 7
35035 'alone 4 6
33219 'important 4 10
33192 winn 4 4
33180 agusta 4 6
33035 someof 4 6
33028 trict 4 5
33015 lura 4 4
33014 whytsell 4 8
32958 timeand 4 7
32794 wenty 4 5
33763 cohm 4 4
33809 allene 4 6
33858 dre 4 3
33898 ior 4 3
35032 sickler 4 7
35003 depositaries 4 12
34993 thefollowing 4 12
34911 theth 4 5
34511 humphries 4 9
34441 whichwe 4 7
34435 camerata 4 8
34412 judiasm 4 7
34407 'up 4 3
34393 kly 4 3
34330 gation 4 6
34292 ourwork 4 7
34281 clementon 4 9
34271 loveof 4 6
34238 exe 4 3
34182 repot 4 5
34125 rigby 4 5
34115 geade 4 5
34035 diegel 4 6
62810 structed 4 8
[3794 rows x 3 columns]
In [11]:
title = 'EDU'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for EDU:
spell_error count word_length
315 sloyd 29 5
2166 bamberger 13 9
2471 tion 12 4
1920 salomon 11 7
364 dep't 11 5
1125 pre 10 3
2519 abrahamson 9 10
959 'the 8 4
924 pub'g 8 5
1772 anb 7 3
2111 cator 7 5
790 ment 7 4
2419 publicschool 6 12
2925 educa 6 5
1783 thr 6 3
1850 naas 6 4
2676 majestatsbeleidigung 5 20
2760 perlen 5 6
2275 mit 5 3
2815 'of 5 3
15 morrill 5 7
1786 brownell 5 8
1592 frederikshavn 5 13
1469 education' 5 10
1330 edu 5 3
106 tiie 4 4
2569 educato 4 7
166 ture 4 4
258 tional 4 6
2810 vergil 4 6
2798 dingley 4 7
441 whatley 4 7
582 tre 4 3
1038 'and 4 4
2523 don'ts 4 6
2057 micr 4 4
2483 chas 4 4
1436 cygnaeus 4 8
2317 dhi 4 3
2228 lan 4 3
2992 ent 4 3
In [12]:
title = 'GCB'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for GCB:
spell_error count word_length
24302 tion 679 4
40181 gcs 436 3
38395 ence 346 4
36370 'the 338 4
42459 ference 289 7
41860 ment 240 4
37146 'of 232 3
31501 ple 186 3
24655 sabbathschool 179 13
26374 'to 166 3
24315 ers 161 3
24659 tions 149 5
42400 eral 146 4
29039 basle 130 5
13869 'be 130 3
40618 chas 127 4
22087 mittee 121 6
15788 seventhday 111 10
35805 ulletin 109 7
8647 'and 109 4
25443 'in 104 3
31065 pre 103 3
21205 sionary 100 7
17212 mis 100 3
6908 amens 85 5
7977 ent 76 3
43210 ren 75 3
20925 ile 72 3
38708 ences 72 5
27563 tional 72 6
2573 'that 71 5
35810 agt 71 3
37530 weiherweg 71 9
18885 ments 67 5
24306 ber 67 3
38577 ary 62 3
17636 lieve 60 5
3381 sabbathkeepers 60 14
41334 peo 60 3
27475 ture 58 4
23152 partment 56 8
33824 'for 56 4
15849 eign 56 4
24241 ful 55 3
2804 'by 54 3
33586 ferences 54 8
2135 dred 53 4
14159 sions 52 5
3133 bers 52 4
2614 inthe 52 5
19585 ized 50 4
17721 'we 50 3
2072 tle 50 3
4368 thi 49 3
13997 akersgaden 48 10
3617 canv 47 4
18810 ters 47 4
21227 ical 45 4
8457 prin 44 4
33728 'is 44 3
21993 sabbathschools 44 14
5220 'have 43 5
30226 ciples 42 6
2261 tem 42 3
4355 'but 41 4
37731 taranaki 41 8
35793 ning 41 4
28477 cutchen 41 7
9115 campmeetings 40 12
6985 'work 40 5
36171 ern 40 3
24909 brunson 40 7
21153 dren 40 4
33487 ity 39 3
10190 tian 39 4
26399 correo 38 6
39355 tive 38 4
40619 sented 38 6
12562 bourke 38 6
22971 'been 38 5
26460 ofthe 37 5
37881 raratonga 36 9
3719 'as 36 3
36429 ioo 36 3
43482 clure 35 5
29900 ican 35 4
3345 sible 35 5
18834 cial 35 4
16266 shiba 35 5
1811 hildebran 35 9
17146 fifield 35 7
20134 rethe 35 5
24293 conthe 35 6
31618 dailybulletin 34 13
40716 tothe 34 5
3901 kee 34 3
2164 erty 33 4
12247 mal 33 3
40543 fora 33 4
33817 ceived 32 6
... ... ... ...
11914 overing 4 7
11797 nueva 4 5
11772 thework 4 7
14372 overthe 4 7
14467 bahler 4 6
31838 conpeople 4 9
17047 apand 4 5
17423 herethe 4 7
17267 thc 4 3
17236 tempation 4 9
17202 'territory 4 10
17194 peoa 4 4
17157 asuncion 4 8
32609 ihave 4 5
17070 standthe 4 8
17066 sto 4 3
32672 sinlessness 4 11
17496 nominationsr 4 12
17040 to' 4 3
16977 terly 4 5
16962 'new 4 4
16945 stantial 4 8
16828 prieser 4 7
16793 harthe 4 6
32838 thisthe 4 7
16776 departthe 4 9
32848 gle 4 3
32309 inour 4 5
32275 retheir 4 7
32898 'thousand 4 9
18198 gerona 4 6
18668 burmah 4 6
18646 himselfthe 4 10
31878 'sent 4 5
18357 peoof 4 5
31983 iences 4 6
18352 bogota 4 6
18306 kjellman 4 8
32043 asthe 4 5
32058 veloped 4 7
32069 hinderance 4 10
17603 mesto 4 5
18096 zations 4 7
17976 sprohge 4 7
17943 'year 4 5
32167 mising 4 6
17941 vith 4 4
17758 misand 4 6
17610 saleof 4 6
32239 beis 4 4
32252 kirkle 4 6
16762 dantly 4 6
32903 fested 4 6
14478 conever 4 7
15368 pra 4 3
15768 'daily 4 6
33409 keiskama 4 8
33457 tuxen 4 5
15737 swiggart 4 8
15553 'training 4 9
33495 connec 4 6
15521 sonship 4 7
15485 vartija 4 7
15453 heavenlies 4 10
15219 preciation 4 10
15819 elffers 4 7
15186 tralasian 4 9
33644 neander 4 7
33676 erning 4 6
15084 conmake 4 7
14831 diningroom 4 10
14772 reour 4 5
33786 'man 4 4
33796 tinually 4 8
14533 geheimnis 4 9
15796 maxon 4 5
33332 papanui 4 7
32906 laplandish 4 10
16497 'doing 4 6
16663 cisely 4 6
16658 farrer 4 6
32981 foland 4 6
32983 ured 4 4
16625 gideonites 4 10
16596 ticed 4 5
16513 pointment 4 9
33032 rael 4 4
33074 ishing 4 6
16452 sota 4 4
15903 ''the 4 5
16368 imthat 4 6
16247 godand 4 6
33158 lletin 4 6
16244 manity 4 6
16200 wherethe 4 8
33243 merly 4 5
16004 'business 4 9
15960 michigani 4 9
33299 wildgrube 4 9
22045 'mission 4 8
[2376 rows x 3 columns]
In [13]:
title = 'GH'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for GH:
spell_error count word_length
26162 smouse 177 6
26702 'the 153 4
24283 schramm 113 7
268 thot 107 4
16496 'of 104 3
18531 jno 99 3
22762 chas 92 4
20048 lintonia 82 8
3106 tion 75 4
5571 'to 75 3
11071 altho 72 5
10755 'and 72 4
452 calmar 66 6
24046 brot 65 4
3395 maynor 62 6
10348 strother 55 8
4752 gos 54 3
16306 eze 51 3
11342 'em 50 3
21187 mis 46 3
11245 pre 46 3
20415 'in 45 3
5388 thi 44 3
24252 ment 44 4
5612 spartanburg 41 11
7875 wilsonia 41 8
20836 gemon 40 5
13581 corsicana 40 9
22388 wagor 39 5
17544 thos 39 4
7583 newbern 38 7
21881 ohe 37 3
5992 brethern 37 8
6405 ocala 37 5
18610 ospe 37 4
16684 vagh 36 4
16728 preceeding 36 10
13719 orangeburg 36 10
3243 'that 34 5
26195 dont 34 4
7749 ospel 34 5
4935 oclock 32 6
3120 ers 32 3
11858 pel 31 3
14738 'for 31 4
16603 abney 31 5
3298 inthe 30 5
5138 tir 30 3
3127 ments 29 5
17880 ioo 29 3
3111 ber 29 3
14898 ood 28 3
20104 ence 28 4
8354 'are 27 4
20998 seventhday 27 10
14607 'is 27 3
11519 mal 26 3
3021 ful 26 3
6459 ent 26 3
3522 tions 26 5
5669 ofthe 26 5
11284 thots 25 5
8640 ference 25 7
15692 palo 25 4
3989 palatka 25 7
16954 thes 24 4
18373 selfdenial 24 10
17584 ern 24 3
17549 ple 23 3
576 simons 23 6
25136 whetsel 23 7
11163 blest 22 5
4770 'not 22 4
26044 sionary 22 7
1679 ver 21 3
7448 kno 21 3
21027 sabbathschool 21 13
25955 gorda 21 5
26330 cleburne 21 8
17323 'be 21 3
19146 austell 21 7
3905 tothe 21 5
20894 sel 21 3
21883 'it 20 3
14089 erald 20 5
6952 ves 20 3
10003 'have 20 5
570 loth 20 4
13777 mer 20 3
21831 'we 19 3
7794 ren 19 3
6711 olvin 19 5
4441 psa 19 3
1954 brack 19 5
13871 devalls 19 7
5396 punta 19 5
3589 kittie 18 6
4180 whi 18 3
11799 ville 18 5
7170 'this 18 5
... ... ... ...
11222 tery 4 4
13451 plete 4 5
8771 loomis 4 6
16644 stangood 4 8
21355 'such 4 5
8656 neces 4 5
20113 ands 4 4
5090 'hundred 4 8
5112 hayti 4 5
20015 simonds 4 7
19972 saken 4 5
5169 tay 4 3
5178 pigott 4 6
5181 depew 4 5
5291 tice 4 4
5327 authur 4 6
5397 mrand 4 5
5579 pla 4 3
19719 hinchcliff 4 10
5646 busines 4 7
5651 cuyler 4 6
19618 vith 4 4
5762 eralo 4 5
19428 the' 4 4
19395 woodall 4 7
19354 chrichlow 4 9
19281 iff 4 3
20185 vicks 4 5
5067 'come 4 5
4956 aaa 4 3
20638 prehaps 4 7
3560 nesmith 4 7
3662 gress 4 5
21125 pia 4 3
20875 knowlege 4 8
4200 wyandottes 4 10
20820 'himself 4 8
20748 ditions 4 7
4335 froin 4 5
20658 purty 4 5
4344 shouldbe 4 8
4949 hyman 4 5
20552 gossage 4 7
4400 kirkwood 4 8
4427 workin 4 6
20503 figtree 4 7
4497 worthen 4 7
20452 of'the 4 6
4555 vitamines 4 9
20357 'doing 4 6
4929 aving 4 5
6237 mony 4 4
6475 knowed 4 6
19022 wiseman 4 7
17146 beilby 4 6
7569 'said 4 5
17450 aniong 4 6
17382 maren 4 5
7837 aubigne 4 7
17369 elzirah 4 7
8030 cbe 4 3
8228 ories 4 5
17168 iiiii 4 5
17155 kerns 4 5
8245 twentyone 4 9
17543 bas 4 3
8251 gertie 4 6
8305 espie 4 5
8396 'could 4 6
16955 ata 4 3
16880 georgie 4 7
8562 cormick 4 7
16721 dif 4 3
8622 ath 4 3
8643 sie 4 3
17510 ial 4 3
7320 ered 4 4
6483 couraging 4 9
6846 gospet 4 6
18836 ierald 4 6
6594 dearmon 4 7
18720 aweary 4 6
18712 hewas 4 5
18632 nutt 4 4
6708 willbe 4 6
18396 rishel 4 6
18391 gosp 4 4
6844 ragan 4 5
18187 nian 4 4
7298 krag 4 4
7050 iord 4 4
7069 arenow 4 6
18017 pilkington 4 10
7070 sisson 4 6
7119 ottr 4 4
7127 ventists 4 8
7133 willacoochee 4 12
17737 hedin 4 5
7241 ough 4 4
13173 cism 4 4
[1025 rows x 3 columns]
In [14]:
title = 'GOH'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for GOH:
spell_error count word_length
1063 nuttose 51 7
1553 bromose 24 7
2597 abbie 20 5
1316 nuttolene 19 9
71 lauretta 18 8
796 protose 14 7
336 lenna 13 5
1320 mackey 12 6
2532 gruels 10 6
2106 chas 10 4
1191 gos 10 3
2288 tion 10 4
1702 drs 10 3
1527 pel 10 3
865 'the 9 4
661 'in 8 3
1719 princi 8 6
2623 ansh 7 4
327 bouchard 7 8
942 tarium 7 6
1581 mynheer 7 7
1027 croutons 7 8
2073 proteids 7 8
1390 evelene 6 7
1860 'to 6 3
28 dqq 6 3
1043 sel 6 3
972 fredrickshavn 6 13
949 'and 6 4
2433 eze 6 3
2450 onehalf 6 7
424 comfortables 6 12
2453 maltol 6 6
281 jir 6 3
2015 strychnin 5 9
2236 fora 5 4
1958 sitz 5 4
1807 fik 5 3
1720 institut 5 8
2251 sani 5 4
1365 selfdenial 5 10
1714 fft 5 3
1213 heiman 5 6
597 flich 5 5
1189 warne 5 5
2773 thi 5 3
55 dulness 5 7
1956 nux 4 3
2002 allready 4 8
2744 healthdestroying 4 16
2717 schillembeck 4 12
307 lightplant 4 10
348 health' 4 7
394 fatand 4 6
413 lindstrom 4 9
484 vomica 4 6
2415 excrementitious 4 15
2385 seventhday 4 10
798 albumins 4 8
1644 mal 4 3
1033 pre 4 3
1085 'for 4 4
1088 bromo 4 5
1240 ood 4 3
2009 rlich 4 5
985 teachout 4 8
In [15]:
title = 'GS'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for GS:
spell_error count word_length
6257 'the 181 4
4423 'of 124 3
12013 aro 111 3
4173 eze 75 3
7908 'to 64 3
15169 'and 63 4
12885 ile 55 3
15862 pre 50 3
13040 ots 47 3
13239 tion 45 4
9558 elds 44 4
9186 timethe 40 7
1689 'is 32 3
4605 'that 32 5
10018 'in 31 3
2806 mal 31 3
15354 ment 30 4
14207 'be 28 3
3879 gospxi 26 6
8048 ofthe 24 5
7658 thi 23 3
6951 ets 22 3
13061 cer 22 3
8082 ehe 21 3
8466 mosheim 21 7
1207 heylyn 21 6
6778 gos 21 3
10868 seventhday 20 10
12373 iow 20 3
11713 'are 19 4
13826 sabbaton 19 8
8764 'he 18 3
7610 haruest 17 7
13056 ble 17 3
7465 'not 17 4
7408 'with 17 5
12985 wharey 16 6
11184 sel 16 3
5987 'his 16 4
5348 glynn 16 5
14511 'it 16 3
16242 pxi 16 3
6577 'as 16 3
539 blest 16 5
15536 ise 16 3
1862 'for 15 4
2425 ver 15 3
18041 'which 15 6
12378 goapxl 15 6
13090 schaff 15 6
14016 'have 14 5
6442 fon 14 3
13163 mor 14 3
18555 goapx 14 5
12539 'will 14 5
5037 'by 13 3
4684 inthe 13 5
10903 sabbathschool 13 13
14177 'all 13 4
4572 vor 13 3
14174 shabbath 13 8
1505 northfield 13 10
2221 goapxi 12 6
572 sigkix 12 6
5985 whi 12 3
6550 berthier 12 8
8564 abrahamic 12 9
15909 'at 12 3
14237 kno 12 3
14637 medo 12 4
10595 thermo 11 6
7296 ple 11 3
6348 ved 11 3
4607 gosp 11 4
12797 gesenius 11 8
1272 ity 11 3
6215 sho 11 3
13910 thd 11 3
1196 'our 11 4
4858 dowling 10 7
11254 murdock 10 7
16947 thr 10 3
13932 chri 10 4
18596 firstday 10 8
10753 wor 10 3
10166 'from 10 5
14405 gop 10 3
7085 vox 10 3
7657 eemperance 10 10
8595 thein 10 5
14178 'this 10 5
5159 shust 10 5
11885 olshausen 10 9
1909 sundaykeeping 10 13
6711 bateham 10 7
9383 neander 10 7
4584 sigklx 9 6
7002 'they 9 5
11782 'upon 9 5
2309 overcomers 9 10
... ... ... ...
6044 haue 4 4
6094 swedena 4 7
6139 igk 4 3
2946 perfeet 4 7
2516 religio 4 7
6713 mina 4 4
1034 rumseller 4 9
201 popo 4 4
228 ving 4 4
348 wisco 4 5
395 'more 4 5
437 sabbathday 4 10
548 thq 4 3
708 eecl 4 4
779 onio 4 4
787 'many 4 5
811 corea 4 5
864 sigkl 4 5
883 'seventh 4 8
1024 'no 4 3
1173 themsel 4 7
2493 'etc 4 4
1217 goapi 4 5
1337 phocas 4 6
1558 royalton 4 8
1600 ght 4 3
1649 ture 4 4
1690 'change 4 7
1786 ople 4 4
1819 ged 4 3
1915 laurvig 4 7
1952 heruli 4 6
2058 jeddo 4 5
2157 translat 4 8
2377 'first 4 6
6253 peopie 4 6
6785 nant 4 4
13364 ingulfed 4 8
11668 gilfillan 4 9
10576 sio 4 3
10590 dungan 4 6
10723 hershe 4 6
10772 'country 4 8
10886 'earth 4 6
11080 atalissa 4 8
11105 vers 4 4
11118 leitchfield 4 11
11203 urrection 4 9
11409 ohe 4 3
11417 ohl 4 3
11499 sabbathbreaking 4 15
11563 ehristian 4 9
12009 yehovah 4 7
10424 'last 4 5
12030 wledge 4 6
12170 'been 4 5
12501 o'f 4 3
12532 elie 4 4
12653 ofhis 4 5
12690 pointments 4 10
12867 doetrine 4 8
12899 ove 4 3
12986 peaceableness 4 13
13029 againat 4 7
13116 kuriakos 4 8
13187 'gospel 4 7
13237 'does 4 5
10574 giv 4 3
10293 oeo 4 3
6860 decretalia 4 10
8797 tentmeetings 4 12
6871 tirosh 4 6
7011 olean 4 5
7257 tution 4 6
7360 tay 4 3
7502 morrice 4 7
7564 hinderance 4 10
7896 catherines 4 10
7980 blo 4 3
8068 ligion 4 6
8306 harrisonville 4 13
8307 ohuroh 4 6
8545 ars 4 3
8610 the' 4 4
8828 thitt 4 5
10253 pgr 4 3
9014 'true 4 5
9038 'us 4 3
9245 'cent 4 5
9417 gowen 4 5
9452 th'e 4 4
9500 hiin 4 4
9567 oro 4 3
9691 giustianni 4 10
9720 gustafson 4 9
9843 longimanus 4 10
9879 anabaino 4 8
10057 alr 4 3
10104 nearl 4 5
18794 'mid 4 4
[488 rows x 3 columns]
In [18]:
title = 'HM'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for HM:
spell_error count word_length
5175 gen'l 102 5
6604 durand 97 6
5407 rep't 92 5
435 miscel 79 6
8413 am't 76 4
6180 avenola 76 7
2387 mis 76 3
4402 l't 64 3
3674 cumb 62 4
7453 imlay 57 5
1400 canof 56 5
6032 cassopolis 55 10
630 raiatea 51 7
438 seventhday 49 10
5005 intyre 43 6
2404 aro 43 3
8589 dist's 41 6
1036 schoolcraft 41 11
5697 agt 39 3
866 scand 38 5
2824 wheatena 37 8
6495 vassers 36 7
7911 lehigh 36 6
5797 hayti 35 5
6953 revassers 35 9
6657 'the 34 4
2820 sabbathschool 33 13
8887 deliv 33 5
6758 raratonga 32 9
6635 sabbathkeepers 30 14
3991 bogota 29 6
8095 sendebud 28 8
6412 deliv'd 28 7
4370 susp'n 28 6
4955 tena 28 4
4296 chas 27 4
5404 bordoville 27 10
8910 tion 27 4
9153 riverton 26 8
7520 grandville 26 10
176 mundy 26 5
9898 watrousville 26 12
9029 greenleaf 26 9
1816 farmington 26 10
7604 pierson 25 7
2842 eddyville 25 9
1948 elkhorn 25 7
2334 vaktare 25 7
7776 grinnell 25 8
1444 mor 25 3
4091 pierrepont 25 10
8135 centerville 25 11
8748 afton 25 5
1676 richford 24 8
4005 danvers 24 7
9479 smithland 24 9
2167 coldwater 24 9
3939 charlemont 24 10
4732 morrice 24 7
8875 springside 24 10
3597 fbr 24 3
4929 castana 24 7
2027 lakeview 24 8
6059 alaiedon 24 8
8317 gowen 23 5
3021 ruthven 23 7
5274 stauffer 23 8
3874 vilas 23 5
1079 kitts 23 5
4602 scottville 23 10
4927 waukon 23 6
5624 pre 23 3
2927 elmwood 22 7
6962 saranac 22 7
4382 lmtd 22 4
921 sunbury 22 7
1522 sandyville 22 10
1067 wamego 22 6
4289 ceresco 22 7
1711 vergennes 22 9
7289 middlebury 21 10
1526 evangeliets 21 11
4191 blendon 21 7
1321 sextonville 21 11
4593 webberville 21 11
8494 sinclairville 21 13
3164 jeddo 21 5
3642 vermontville 21 12
1588 edinboro 20 8
1624 parkville 20 9
5738 twentyfive 20 10
5216 eze 20 3
5207 sedalia 20 7
953 childstown 20 10
1194 satolli 20 7
9918 grangeville 20 11
4059 almira 20 6
9590 brookings 20 9
8807 'of 20 3
4284 ladonia 20 7
... ... ... ...
5889 spanishspeaking 4 15
2295 cept 4 4
5958 pmpmpm 4 6
1689 bloomville 4 10
5995 avoca 4 5
2355 nanson 4 6
6167 alpharetta 4 10
6179 ong 4 3
6186 follo 4 5
1496 ili 4 3
6363 'there 4 6
1735 misha 4 5
4250 thein 4 5
6436 stremann 4 8
1402 ithe 4 4
6502 intrust 4 7
6507 nig't 4 5
6514 thirtyfive 4 10
6533 walkerton 4 9
6564 nyassa 4 6
1376 wallowa 4 7
6613 shawmut 4 7
5837 ure 4 3
2278 sanningens 4 10
8534 iss 4 3
5296 thirtythree 4 11
2147 dixo 4 4
2122 apeth 4 5
4943 eldred 4 6
2159 frederikshavn 4 13
5061 ifi 4 3
5080 fide 4 4
4832 traylor 4 7
5170 goldsberry 4 10
1920 'any 4 4
1915 winti 4 5
1857 twentynine 4 10
4552 p'fie 4 5
5362 farnum 4 6
5384 papetoai 4 8
14 tri 4 3
1790 capps 4 5
5473 freemont 4 8
2252 calebs 4 6
5595 brn 4 3
5608 berthoud 4 8
5626 reis 4 4
4572 grenfell 4 8
1346 guadaloupe 4 10
1340 sions 4 5
6711 andthe 4 6
662 hansa 4 5
3805 acra 4 4
7913 hea 4 3
3792 gorman 4 6
7943 ''the 4 5
7954 tierra 4 6
693 amyot 4 5
7990 canv 4 4
8021 kibira 4 6
8052 ansgarius 4 9
8057 caro 4 4
3679 britian 4 7
6737 sbbath 4 6
8185 l'i 4 3
647 br'ght 4 6
3665 visser 4 6
8325 crowther 4 8
8359 kroners 4 7
2752 kelsea 4 6
8442 ble 4 3
8470 godgiven 4 8
8480 presque 4 7
8506 nickerson 4 9
7862 sharpsburg 4 10
7791 priate 4 6
797 pottstown 4 9
849 juras 4 5
1288 taftsville 4 10
6877 seffner 4 7
6885 gome 4 4
6902 'so 4 3
4096 hollandville 4 12
1107 taopi 4 5
2514 'in 4 3
7257 helvetians 4 10
7303 inthe 4 5
3999 metropolitans 4 13
7400 peckham 4 7
1043 clure 4 5
1024 cassopolie 4 10
4913 richville 4 9
7581 wacek 4 5
984 fleshmeats 4 10
2636 allister 4 8
7620 espirito 4 8
937 itinerating 4 11
7703 bliven 4 6
3880 medora 4 6
6686 nowlin 4 6
[670 rows x 3 columns]
In [19]:
title = 'HR'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for HR:
spell_error count word_length
28466 tion 950 4
20855 sel 633 3
72492 cafe 595 4
15620 sitz 460 4
50304 ment 445 4
31479 pre 423 3
61348 proteid 417 7
45282 hydrozone 266 9
2316 tions 265 5
76552 glycozone 250 9
26613 kumyss 247 6
32473 agt 220 3
55482 chas 217 4
79511 marchand 215 8
37440 priessnitz 207 10
40862 sirup 180 5
12623 tremens 180 7
30726 hypopepsia 177 10
61891 'em 176 3
11648 tri 175 3
90127 ance 169 4
81227 ble 157 3
72423 keeley 157 6
31201 ous 155 3
67026 ments 155 5
15859 ful 154 3
56875 'the 154 4
62831 tem 152 3
6277 trall 147 5
86637 deimel 146 6
73573 cornaro 138 7
6184 ers 133 3
69701 ence 132 4
69993 ent 128 3
88946 microscopists 126 13
43989 ity 125 3
71050 clure 125 5
78955 ecole 120 5
62432 kedzie 120 6
16325 wuz 120 3
5731 onehalf 119 7
27297 ridpath 119 7
11176 hygeio 118 6
31228 ili 116 3
86271 hydriatic 116 9
30615 ple 114 3
61584 vick 112 4
79835 socalled 108 8
26309 fehr 108 4
58111 gruels 107 6
81217 ture 106 4
16890 infantum 106 8
7069 paso 106 4
85605 ure 105 3
86141 electropoise 104 12
83812 pim 104 3
53225 dren 97 4
81478 ical 97 4
27280 tle 96 3
91992 basle 96 5
68797 ber 96 3
19271 meltose 95 7
45251 derangements 94 12
4916 mal 94 3
86489 munn 92 4
50416 twentyfive 91 10
91345 centrale 91 8
76742 ealth 91 5
31962 pharmacal 90 9
41540 schoolcraft 89 11
79481 drexel 89 6
64041 dextrinized 87 11
56573 ceo 86 3
82386 soo 86 3
23840 strychnia 85 9
83553 caffein 84 7
36429 crandon 84 7
4888 morbus 84 6
26457 corpore 82 7
91308 bacco 82 5
41557 enemata 81 7
9922 institut 81 8
34059 parral 80 6
81467 eral 79 4
34434 alabastine 79 10
91533 bloodvessels 78 12
39792 pawlow 77 6
58759 ioo 76 3
18831 chautauquan 76 11
55601 mis 76 3
73873 accom 75 5
35276 twentyfour 75 10
54967 ood 74 3
12182 colman 74 6
11664 sanitaire 74 9
3136 farnum 73 6
67838 boylston 73 8
13676 ani 73 3
83099 murdock 72 7
41584 condit 71 6
... ... ... ...
70435 ified 4 5
70472 wellboiled 4 10
70492 threeor 4 7
12689 appara 4 6
33935 terly 4 5
4682 emorest 4 7
71584 sendfreea 4 9
4722 breederswe 4 10
71472 cotosuet 4 8
71322 bacheler 4 8
34238 shafer 4 6
34281 sensical 4 8
4738 moand 4 5
4755 up' 4 3
34288 here' 4 5
71239 tipulary 4 8
4804 moqui 4 5
71213 eyesa 4 5
71122 hwth 4 4
34295 sacri 4 5
12779 glycorone 4 9
4826 axler 4 5
34445 villemin 4 8
34491 youare 4 6
70890 zemzem 4 6
70843 grizel 4 6
34600 ential 4 6
34616 tinues 4 6
34797 divorcecourts 4 13
34814 tiiis 4 5
70671 mieh 4 4
34947 evrard 4 6
35141 nute 4 4
35903 faris 4 5
36035 meateater 4 9
69392 vaipipg 4 7
12251 giessen 4 7
5375 manwoman 4 8
12227 seg 4 3
37045 egtensive 4 9
12226 elc 4 3
37198 britian 4 7
68119 trils 4 5
37231 apprecia 4 8
37279 waddington 4 10
68097 flagg 4 5
68033 arrearages 4 10
68032 shiverings 4 10
68016 kisi 4 4
37313 iixa 4 4
37362 eie 4 3
67970 workin 4 6
67953 brushings 4 9
12177 rassed 4 6
37456 oeen 4 4
67878 lachrymation 4 12
5497 advertbements 4 13
37531 therapeuptic 4 12
67812 necessaryand 4 12
37622 bastie 4 6
12146 aire 4 4
37656 exis 4 4
67685 eat' 4 4
12068 alexins 4 7
37826 inspec 4 6
67602 ampmpm 4 6
37003 distemperate 4 12
68567 drouths 4 7
12463 rawnsley 4 8
5366 'same 4 5
36136 formad 4 6
36138 oir 4 3
69363 brophy 4 6
36154 yellowfever 4 11
69325 shal 4 4
36164 ceeding 4 7
5214 laundried 4 9
69210 mps 4 3
36307 ljtaith 4 7
69183 ved 4 3
69174 snanitiatarriium 4 16
12392 icycle 4 6
12321 razzle 4 6
68979 lthough 4 7
5308 heatmaking 4 10
5315 sweetcakes 4 10
68920 schnirer 4 8
36673 reina 4 5
68880 crowell 4 7
68812 inbe 4 4
12287 maake 4 5
36849 crt 4 3
68705 amd 4 3
36871 divinny 4 7
36891 neison 4 6
68671 eted 4 4
36963 lallemand 4 9
36964 perihelionists 4 14
36965 itif 4 4
33550 swinyard 4 8
[6691 rows x 3 columns]
In [6]:
title = 'IR'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for IR:
spell_error count word_length
2109 tion 359 4
23146 mahan 315 5
8358 ence 177 4
5308 presidentw 175 10
22751 walkerton 167 9
19537 tions 144 5
6118 unionville 143 10
20371 rocklane 140 8
18307 ference 139 7
3 chas 134 4
14221 ment 131 4
10384 frankton 125 8
1881 inwood 123 6
3615 adiana 121 6
5761 secretaryw 119 10
15796 seventhday 112 10
21673 ber 109 3
7323 medaryville 90 11
416 ple 88 3
17156 geporter 86 8
21982 horlacher 84 9
14207 suptc 82 5
2119 ers 81 3
4000 boze 79 4
22545 ren 79 3
7738 dilworth 77 8
6947 brookston 75 9
18172 indpls 75 6
12118 committeew 72 10
1050 sionary 72 7
14809 minnick 67 7
21790 ent 67 3
131 sunman 65 6
4164 ary 62 3
19087 pepple 61 6
20962 treasurerw 58 10
12859 cuaig 57 5
11833 secretariesa 56 12
18610 treasurera 56 10
132 nuding 56 6
12629 treasurert 55 10
3364 mis 53 3
14125 burkhart 52 8
9700 missionaryr 52 11
4098 ance 52 4
2708 ville 51 5
13909 ments 51 5
4904 beath 51 5
1401 ters 50 4
14087 adelia 50 6
16039 hodapp 49 6
14115 haskins 47 7
14114 pre 45 3
4547 ful 45 3
6901 busz 42 4
116 eral 41 4
5978 dianapolis 39 10
18445 higbee 39 6
14974 rium 39 4
11370 lugenbeal 39 9
18844 mittee 39 6
2401 wirt 36 4
16179 metzker 36 7
6588 possman 36 7
8389 bers 36 4
23220 altho 35 5
8679 indi 35 4
22151 athen 35 5
7184 britton 35 7
21422 hussey 34 6
19175 apolis 34 6
15883 mellinger 34 9
21458 ceived 34 6
10926 wanteda 33 7
6774 'the 33 4
12103 crary 33 5
8081 dren 33 4
9091 cleland 32 7
11763 mal 32 3
7058 ation 31 5
7829 gabriella 31 9
3280 kenney 31 6
14136 libertya 31 8
21880 sions 31 5
19097 larkin 31 6
4819 medicaldr 31 9
16868 korn 30 4
6834 cleotis 30 7
17604 terest 30 6
294 huntingburg 30 11
17233 carahoof 29 8
12197 tarium 29 6
3033 portant 29 7
16948 ture 29 4
6472 secretaryj 28 10
19311 geperter 28 8
79 ington 28 6
19053 ning 28 4
10887 peo 28 3
4225 thos 28 4
... ... ... ...
16000 interthe 4 8
15907 accomodated 4 11
15847 cunig 4 5
15694 neese 4 5
15635 couragingly 4 11
15545 nected 4 6
9278 knowl 4 5
15540 lauffer 4 7
15533 het 4 3
15508 immedi 4 6
15431 whittaker 4 9
15348 elt 4 3
15321 imand 4 5
15248 wer 4 3
14866 tieing 4 6
14750 prepar 4 6
14654 sanitar 4 7
14558 jority 4 6
16384 haye 4 4
16438 occa 4 4
16622 lecting 4 7
16645 sehool 4 6
18137 employes 4 8
17971 lts 4 3
17967 exof 4 4
17945 newed 4 5
17923 ization 4 7
17911 uhe 4 3
17883 conwas 4 6
17820 guage 4 5
17711 strating 4 8
17661 secretarya 4 10
17613 fairlaud 4 8
17609 lms 4 3
17443 preceeded 4 9
17421 ntsh 4 4
17258 sanitari 4 8
17103 remem 4 5
17072 ethelyn 4 7
17021 stantial 4 8
16992 perty 4 5
16968 nancial 4 7
16937 misof 4 5
16767 wakarusa 4 8
16693 cerenola 4 8
14449 lizzfe 4 6
14378 cerely 4 6
14350 memthe 4 6
11961 sug 4 3
11831 clawson 4 7
11736 eeeeee 4 6
11699 deis 4 4
11408 'it 4 3
11179 rewith 4 6
11050 faiththe 4 8
11034 timonies 4 8
10983 indianapous 4 11
10852 laand 4 5
10784 beto 4 4
10686 gansport 4 8
10439 estly 4 5
10277 bewill 4 6
10219 jbuhalts 4 8
10208 cli 4 3
10156 sponded 4 7
10135 durgan 4 6
9837 dustrial 4 8
9769 komo 4 4
9762 iola 4 4
9476 templeton 4 9
9441 'if 4 3
9279 bufialts 4 8
11871 wolflake 4 8
12029 malony 4 6
14343 enty 4 4
12061 shambaugh 4 9
14272 exthat 4 6
14249 walburn 4 7
14190 sto 4 3
14121 'po 4 3
14019 adian 4 5
13765 rcher 4 5
13599 haps 4 4
13479 margeret 4 8
13116 marton 4 6
13099 forand 4 6
12999 liever 4 6
12994 ohmer 4 5
12993 warrick 4 7
12809 cott 4 4
12806 beand 4 5
12780 peoof 4 5
12628 presidenti 4 10
12486 culation 4 8
12464 prepara 4 7
12458 michaelville 4 12
12331 condi 4 5
12143 sisted 4 6
12141 ruary 4 5
23335 cers 4 4
[1053 rows x 3 columns]
In [7]:
title = 'LB'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for LB:
spell_error count word_length
10799 mackey 296 6
14763 hsi 247 3
23192 halsted 165 7
7895 ile 142 3
13506 vitamines 110 9
7689 lundell 97 7
23332 kershaw 95 7
21299 auley 92 5
14910 pearsons 91 8
24764 harner 90 6
15591 stapp 84 5
3514 'the 84 4
483 pavlson 81 7
17017 chas 79 4
16922 soulwinning 76 11
2498 'to 72 3
23901 crittenton 71 10
22240 twentyfive 69 10
7922 chicagotrains 68 13
10192 courtland 55 9
11960 dannemora 55 9
3304 bilhorn 53 7
8226 burghart 52 8
861 pawlow 51 6
19634 thekla 49 6
17951 gazeteer 49 8
1116 papercovered 48 12
14525 ridpath 45 7
17780 whisler 44 7
23230 tion 43 4
21419 'of 43 3
19572 jno 42 3
23195 zada 42 4
21072 'and 41 4
12052 saloonkeeper 41 12
6181 minutesfifty 41 12
21951 jeffers 40 7
15662 'we 40 3
10797 laundryin 39 9
12256 cyclopmdic 39 10
1130 stillwater 39 10
22870 edholm 38 6
1473 colortype 38 9
4732 desplaines 38 10
5889 'phone 38 6
17360 kohlsaat 36 8
21261 eze 35 3
20293 psa 35 3
1314 pre 34 3
19859 salle 34 5
17907 agt 34 3
6237 kedler 34 6
10954 sevenjeweled 33 12
24169 ment 33 4
9666 vories 32 6
16941 holaday 32 7
4106 egal 31 4
21445 employe 31 7
15138 luyster 31 7
19204 mal 31 3
9024 cann 31 4
20228 stantly 30 7
13740 cyclopedic 30 10
17421 tkt 30 3
3382 sinsick 29 7
12504 'in 29 3
5758 kniskern 29 8
1513 lbinsbale 29 9
2407 oldfashioned 29 12
12134 thos 29 4
4958 leavitt 28 7
20704 waltham 28 7
10277 ranney 28 6
7495 tyrer 28 5
1668 printype 27 8
1029 cyclopaedic 27 11
5503 zoerb 26 5
21218 cyclopxdic 26 10
18339 gipsy 26 5
4542 anb 26 3
22593 anamosa 25 7
10926 hurd 25 4
4520 tiie 25 4
8112 themnot 25 7
24190 potosi 25 6
20938 burleson 24 8
16436 onehalf 24 7
16214 medo 24 4
12131 ufford 24 6
5712 ballington 24 10
14083 selfsupporting 24 14
2491 employes 23 8
20708 'neath 23 6
3881 mis 23 3
8730 rawlinson 23 9
906 ili 22 3
9282 ments 22 5
530 cassimeres 22 10
11650 appli 22 5
3818 cbicago 21 7
... ... ... ...
20581 methat 4 6
20569 tio 4 3
20552 fausset 4 7
15071 iiii 4 4
20471 supervisionof 4 13
20435 hitt 4 4
4682 tlie 4 4
15075 gpta 4 4
11000 gowlie 4 6
4759 thou'lt 4 7
4790 follo 4 5
10991 batonga 4 7
4843 cornmunity 4 10
20378 foodless 4 8
4894 creegan 4 7
10955 lation 4 6
15145 uring 4 5
20301 daybuthave 4 10
4927 'whosoever 4 10
4965 carscallen 4 10
20269 editori 4 7
20611 tae 4 3
14918 shoop 4 5
11107 easurements 4 11
20997 ered 4 4
21131 llo 4 3
21119 usward 4 6
4152 nating 4 6
11154 deathdealing 4 12
21103 itself' 4 7
4158 brodder 4 7
4232 him' 4 4
4243 ral 4 3
4290 'on 4 3
4306 rro 4 3
14769 simson 4 6
4502 ister 4 5
4331 georgeson 4 9
4348 raws 4 4
9397 naturedly 4 9
14801 eskridge 4 8
20853 subwe 4 5
14901 bosphorus 4 9
20816 brompton 4 8
20802 teresting 4 9
4447 poisonful 4 9
20768 pharoah 4 7
4994 nickles 4 7
15168 wand'ring 4 9
5050 muscatine 4 9
19426 helzer 4 6
19653 sdale 4 5
10607 lusx 4 4
19633 foss 4 4
5715 bulow 4 5
5725 samkoff 4 7
5740 worldthe 4 8
5845 lifea 4 5
10554 haing 4 5
19502 christian' 4 10
19454 ilissionary 4 11
19219 enger 4 5
19724 peoplepeople 4 12
19206 'since 4 6
6006 mee 4 3
15510 rurses 4 6
10529 sandow 4 6
15564 twills 4 6
10490 'when 4 5
15601 mahan 4 5
6143 luvster 4 7
6211 talcott 4 7
6235 rhe 4 3
19703 ught 4 4
5680 'him 4 4
10952 oddsize 4 7
19934 appre 4 5
10818 cowee 4 5
15217 rawlins 4 7
5103 lauck 4 5
5170 leseme 4 6
5219 sor 4 3
20055 warrenville 4 11
15224 smerdis 4 7
20002 hostetter 4 9
10714 olivers 4 7
5241 keinhoff 4 8
19920 ough 4 4
19743 harrigan 4 8
5462 vix 4 3
5481 burford 4 7
19854 tli 4 3
10622 ese 4 3
15295 soul' 4 5
5568 ement 4 5
10618 hebard 4 6
10610 besetments 4 10
19763 companyso 4 9
5666 valdosta 4 8
12789 conversationala 4 15
[1352 rows x 3 columns]
In [8]:
title = 'LH'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for LH:
spell_error count word_length
3136 cornforth 267 9
15571 tri 120 3
25076 tion 119 4
7320 nauheim 91 7
4410 antituberculosis 87 16
1492 pre 83 3
8341 'ad 71 3
18562 vitamine 64 8
17815 onehalf 63 7
24188 socalled 62 8
8615 ment 61 4
14628 quinin 61 6
9133 lllll 59 5
18254 ioo 54 3
17120 roseburg 53 8
19743 kee 52 3
21068 friedmann 52 9
25310 osler 51 5
5812 karmatar 50 8
22332 sanatoria 50 9
16894 bulkley 45 7
15295 drugless 45 8
7572 antityphoid 45 11
18465 chas 45 4
14610 nebulizers 44 10
697 imprenta 42 8
13688 peruna 41 6
7691 westfield 41 9
7241 sitz 40 4
1263 madronas 40 8
14286 unvaccinated 40 12
19067 cromie 40 6
24603 bannerman 40 9
23321 'the 40 4
24044 picric 39 6
26551 gulick 39 6
17163 verdad 39 6
26769 upto 38 4
14193 frictionary 37 11
12218 iiiii 37 5
4451 achard 37 6
15392 welltrained 37 11
22530 nozaleda 37 8
7479 bellair 37 7
22633 bournville 36 10
11781 kinau 36 5
1010 ili 35 3
20645 hindhede 35 8
13161 mal 34 3
21293 lorand 33 6
15525 herter 33 6
2016 goldberger 32 10
13784 ful 32 3
25355 peroxid 32 7
4251 mis 32 3
1903 pellagrins 32 10
22429 purin 32 5
12659 openair 31 7
17721 keech 31 5
5275 welch's 31 7
16821 mahon 30 5
2990 ellamont 30 8
5844 wellknown 29 9
12134 collum 29 6
16274 iiii 29 4
4614 musselman 29 9
4492 cornaro 29 7
9263 ptomain 29 7
2310 ini 28 3
16644 nal 28 3
1857 llllll 27 6
20777 electriclight 27 13
15724 ridpath 27 7
6800 voit 27 4
698 twentyfour 27 10
11617 ith 27 3
22831 'and 27 4
10559 thermo 26 6
1055 ent 26 3
22525 canners 26 7
21639 omprising 26 9
23200 'of 26 3
23846 twentythird 25 11
20865 nonmeat 25 7
18859 guilbert 25 8
13211 doran 25 5
16953 salvarsan 25 9
24054 twentyfive 25 10
967 pawlow 24 6
20870 sha 24 3
12354 whalebones 24 10
16518 deathrate 24 9
6683 rosenau 24 7
16183 rane 24 4
14826 misbranded 24 10
1327 moneyorder 24 10
19321 woodhead 23 8
15124 iet 23 3
4145 healt 23 5
19292 ealth 23 5
... ... ... ...
19268 lifr 4 4
19282 recanned 4 8
19318 nyassaland 4 10
19399 timehonored 4 11
19535 paso 4 4
19546 carbo 4 5
16064 gipsy 4 5
16022 stines 4 6
10586 hydrtherapy 4 11
12259 harken 4 6
11570 iiiiiiiiiiiiiiii 4 16
11751 koren 4 5
11755 pharmacopceia 4 13
11802 ress 4 4
11812 thera 4 5
11854 icebag 4 6
11950 coldmitten 4 10
12043 tremely 4 7
12207 stracts 4 7
12396 anc 4 3
15995 mak 4 3
12399 ihe 4 3
12573 heatand 4 7
12597 inhalatorium 4 12
12710 antiputrefactive 4 16
12751 cokord 4 6
12904 woodalcohol 4 11
12906 piki 4 4
12916 almostautomatically 4 19
12970 cffl 4 4
11514 igi 4 3
11506 hono 4 4
11431 northend 4 8
11425 ivr 4 3
10613 opment 4 6
10727 kathrina 4 8
10734 bodyand 4 7
10747 litform 4 7
10771 wun 4 3
10810 ctsayear 4 8
10935 rhin 4 4
10961 ftf 4 3
10979 illissionary 4 12
11057 fernet 4 6
11101 appa 4 4
11103 seidlitz 4 8
11110 ductory 4 7
11189 soyer 4 5
11196 gerontic 4 8
11278 darnall 4 7
11288 ninetyseven 4 11
11293 oot 4 3
11338 wageearners 4 11
12979 payson 4 6
12984 helsingfors 4 11
12998 ille 4 4
14535 lackawanna 4 10
14620 gooa 4 4
14711 nificant 4 8
14717 fli 4 3
14727 safetypins 4 10
14796 ife 4 3
14855 mor 4 3
14904 veiller 4 7
15249 selfsupporting 4 14
15304 lene 4 4
15429 ofdoor 4 6
15441 homekeeper 4 10
15465 mei 4 3
15502 ake 4 3
15610 bons 4 4
15759 lnd 4 3
15782 toif 4 4
15846 samado 4 6
15877 combatting 4 10
15994 erly 4 4
14560 fre 4 3
14487 mment 4 5
13108 conserver 4 9
14408 tial 4 4
13175 cepted 4 6
13189 gorst 4 5
13207 clubb 4 5
13228 flueless 4 8
13242 iealth 4 6
13250 salud 4 5
13274 rettger 4 7
13276 schlickeysen 4 12
13282 ook 4 3
13377 sulting 4 7
13764 bowsfield 4 9
13877 portunity 4 9
13976 ficient 4 7
13982 huchard 4 7
14050 trom 4 4
14112 stockard 4 8
14323 electriclighted 4 15
14377 tingfang 4 8
14401 cgdking 4 7
27242 warbasse 4 8
[1606 rows x 3 columns]
In [9]:
title = 'LibM'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for LibM:
spell_error count word_length
412 gallivan 61 8
3949 religio 48 7
1554 miraglia 45 8
6330 tion 43 4
8822 cxsar 40 5
7506 neander 38 7
5181 charta 37 6
2382 ment 32 4
6487 chas 30 4
257 seventhday 29 10
1233 mutchler 28 8
1977 pre 25 3
581 heyburn 23 7
2949 connell 21 7
3971 'the 20 4
7138 haverhill 19 9
766 brevities 19 9
3577 eze 18 3
138 siegel 17 6
7015 interchurch 17 11
3021 cockran 16 7
3185 parte 15 5
4197 'of 15 3
5169 socalled 15 8
2819 sunclay 14 7
169 robb 14 4
9122 gaw 14 3
5880 connorton 14 9
6172 bastile 13 7
5787 bonzano 13 7
7458 fairmount 13 9
5397 claxton 13 7
4754 roseburg 13 8
7545 krieger 13 7
362 mmm 13 3
3340 churchand 13 9
6331 andstate 12 8
8323 hamurabi 12 8
6203 ioo 12 3
3145 smoot 12 5
8617 bannerman 12 9
5308 medo 12 4
8720 ligious 12 7
3723 tions 11 5
2853 gaynor 11 6
5424 rooker 11 6
8016 sundaylaw 11 9
1670 vagh 11 4
695 ernment 11 7
6784 kerens 10 6
4983 libertyloving 10 13
5802 hanly 10 5
3149 lllll 10 5
6424 brien 10 5
2227 prin 10 4
845 gantenbein 10 10
7444 borah 10 5
421 elsnath 10 7
1221 ber 10 3
7523 clinedinst 10 10
3063 mayhew 10 6
7659 twentyfive 10 10
8726 ashby 10 5
8134 cathedra 10 8
1677 cxxxiv 9 6
9031 religi 9 6
8935 ringgold 9 8
377 farreaching 9 11
5688 filiated 9 8
4039 ellamont 9 8
2852 ridpath 9 7
2883 upsall 9 6
6417 frisons 9 7
884 twentyfour 9 10
5579 libert 9 6
2468 ile 9 3
2536 honorius 9 8
3386 tithingman 9 10
5346 diaz 8 4
2851 nozaleda 8 8
5507 ligion 8 6
2677 faneuil 8 7
2958 dagonya 8 7
3749 cmsar 8 5
4812 verdad 8 6
3461 minton 8 6
3876 bartholdt 8 9
3635 woolman 8 7
8300 lil 8 3
554 bourke 8 6
3547 sundayclosing 8 13
8022 ttf 8 3
7984 temporalities 8 13
7779 ili 8 3
4211 stitution 8 9
4251 erty 8 4
4295 laurin 8 6
6643 tiie 8 4
3100 burleson 7 8
4462 ity 7 3
... ... ... ...
8165 alister 5 7
5055 thro 5 4
621 millan 5 6
591 pia 5 3
484 ministerium 4 11
1365 ite 4 3
9180 firstand 4 8
1344 saboth 4 6
8625 stanchly 4 8
199 iie 4 3
204 tkg 4 3
8752 pers 4 4
7186 erance 4 6
9161 pereira 4 7
7157 cutchen 4 7
7348 millington 4 10
7321 querque 4 7
7972 mittee 4 6
511 troduced 4 8
8433 cwsar 4 5
7973 carbo 4 5
751 aked 4 4
723 tlf 4 3
7722 canalejas 4 9
1014 wetmore 4 7
7568 kihrrtu 4 7
541 chainless 4 9
1021 shi 4 3
7357 lello 4 5
8463 brownson 4 8
1034 allister 4 8
1102 francesco 4 9
539 labor' 4 6
1224 ketcham 4 7
8565 hosius 4 6
1503 ayear 4 5
3750 prima 4 5
6334 grosscup 4 8
1518 'for 4 4
4449 crozer 4 6
5052 nct 4 3
3075 ofi 4 3
4902 tkr 4 3
3088 duced 4 5
4771 nem 4 3
3130 duval 4 5
4753 ciple 4 5
4740 dred 4 4
4695 attleboro 4 9
4663 legisla 4 7
3263 servance 4 8
4655 sulzer 4 6
4590 tle 4 3
4342 iti 4 3
1567 botsford 4 8
4275 creedal 4 7
4243 pulsory 4 7
4222 scriptions 4 10
3432 bluelaws 4 8
3498 christison 4 10
4145 selfevident 4 11
4081 atheneum 4 8
4066 henshaw 4 7
3632 mee 4 3
4036 casar 4 5
3968 gasless 4 7
3961 aweteranian 4 11
3660 iated 4 5
5066 sannella 4 8
5103 selfsacrifice 4 13
5121 ferred 4 6
5146 exer 4 4
6893 tlie 4 4
6852 lation 4 6
6776 impor 4 5
6747 liberi 4 6
6536 imm 4 3
6496 usconstitution 4 14
1674 kai 4 3
6421 benziger 4 8
1676 twentyfirst 4 11
6372 mehmed 4 6
1687 iow 4 3
1791 torchlights 4 11
1887 reichstag 4 9
6094 tive 4 4
6017 tant 4 4
1999 temere 4 6
2028 fide 4 4
2317 sweetser 4 8
2458 murietta 4 8
2519 llllll 4 6
5453 eral 4 4
2630 lished 4 6
2783 ereign 4 6
5314 porta 4 5
2841 dowling 4 7
2913 steffens 4 8
3069 libertas 4 8
120 sov 4 3
[311 rows x 3 columns]
In [10]:
title = 'LUH'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for LUH:
spell_error count word_length
7270 vagh 663 4
19685 ords 471 4
11408 drury 455 5
18121 chas 443 4
10254 suda 353 4
19610 shelbyville 284 11
26642 herrin 275 6
6736 conaughey 271 9
22351 kimberlin 266 9
10191 plake 241 5
18855 wanteda 235 7
22677 kingman 228 7
12336 tri 222 3
10285 mitzelfelt 217 10
5701 rothbury 199 8
14019 devereaux 195 9
23016 coldwater 195 9
4784 urbandale 194 9
20007 tillie 193 6
21806 englewood 182 9
9643 dimondale 176 9
1097 seventhday 175 10
17131 ruh 170 3
16589 mahan 166 5
25912 pengelly 166 8
1093 greenbush 162 9
12595 mattoon 161 7
6175 kittleson 152 9
15862 clellan 151 7
18366 kinderhook 151 10
15963 tatton 150 6
12688 gowen 149 5
6649 palmiter 148 8
5509 herrington 145 10
23118 clintonville 145 12
17297 rapson 144 6
8822 bluford 143 7
22359 unionville 141 10
25533 clenathan 141 9
22579 colton 139 6
5893 horr 138 4
23068 alaiedon 137 8
5209 elmwood 137 7
22867 'the 137 4
3834 emerick 137 7
4752 scand 133 5
9284 trufant 131 7
17153 palo 131 4
23333 underhill 131 9
14917 bloomville 127 10
8513 sabbathschool 126 13
18774 inwood 126 6
4989 watrousville 122 12
10260 sunman 122 6
25352 crandon 119 7
21893 soo 117 3
17430 scholz 113 6
4447 addis 110 5
6805 bello 108 5
2592 hintz 108 5
26501 halderson 108 9
7108 cleora 104 6
13058 bernitt 104 7
20301 lundquist 103 9
4083 mis 103 3
10595 rideout 102 7
22354 eachern 102 7
12931 thos 101 4
766 brethern 99 8
8640 coppock 97 7
11870 mina 96 4
18581 garber 92 6
11365 possman 90 7
17193 bissett 89 7
20526 ludington 89 9
20506 guire 88 5
19949 pontoosuc 87 9
20140 fortville 87 9
15519 zeba 84 4
375 churchschool 83 12
13171 leetsville 83 10
5753 evitts 80 6
22734 'of 80 3
17184 truf 80 4
7891 erald 78 5
20413 rocklane 77 8
3719 junct 77 5
14302 barryton 75 8
15158 remsen 74 6
26513 wegtworth 73 9
22022 elkton 73 6
26405 lausten 72 7
9717 twombly 70 7
20403 maplegrove 70 10
24034 orde 69 4
5534 hardt 69 5
9367 banty 68 5
23662 twentyfive 67 10
127 dighton 66 7
900 crail 66 5
... ... ... ...
22769 mancelona 4 9
22772 dar 4 3
22807 belville 4 8
2338 erickle 4 7
9214 tbe 4 3
2605 jes 4 3
2870 churchmembership 4 16
2636 madson 4 6
2850 fourty 4 6
13325 nathu 4 5
9074 rohr 4 4
13304 cuaig 4 5
22325 ofdoors 4 7
9087 prickitt 4 8
9109 convis 4 6
13208 ortonville 4 10
2744 gladto 4 6
2723 secretarytreasurer 4 18
22390 delp 4 4
22400 publicatior 4 11
2680 whittmore 4 9
9139 cakainion 4 9
2648 het 4 3
8657 literture 4 9
21412 delc 4 4
21401 tithepaying 4 11
14804 chism 4 5
20295 mov 4 3
20297 mitzelfeldt 4 11
4273 nfr 4 3
14718 bently 4 6
8036 nobleville 4 10
8063 biederwolf 4 10
8073 nieetings 4 9
14666 hasbeen 4 7
8075 'od 4 3
20427 caipiras 4 8
4135 urbina 4 6
14606 goblesville 4 11
14603 rone 4 4
3977 waddell 4 7
8092 schoolcraft 4 11
7990 tennesee 4 8
20222 hedwig 4 6
20530 heartsearching 4 14
14879 loami 4 5
4508 konechny 4 8
4502 walkerto 4 8
19891 kirkham 4 7
4475 parshall 4 8
4458 greid 4 5
19942 tunnell 4 7
14930 onal 4 4
7811 everet 4 6
20008 interlineations 4 15
20013 augtst 4 6
20055 elkart 4 6
4353 thetime 4 7
4339 liij 4 4
4336 ering 4 5
20198 ppe 4 3
3911 srawberry 4 9
20584 helzer 4 6
8590 toour 4 5
3475 vella 4 5
21084 schuh 4 5
8405 schoenfeld 4 10
14176 arlie 4 5
21112 posession 4 9
21114 uptegrove 4 9
21116 herkimer 4 8
3413 nem 4 3
21175 otho 4 4
3381 valdamar 4 8
14090 'other 4 6
21331 ddress 4 6
21334 pso 4 3
3360 haughev 4 7
8582 rahr 4 4
21384 berd 4 4
14184 ardenne 4 7
14198 'church 4 7
14527 lorr 4 4
20979 oggs 4 4
14443 colson 4 6
3853 repitched 4 9
8210 finnell 4 7
3733 irresistable 4 12
3687 fahrion 4 7
20792 amberg 4 6
3641 apolis 4 6
14391 wideawake 4 9
3619 year' 4 5
20908 granoila 4 8
14377 hom 4 3
8258 oeakainion 4 10
3545 baraga 4 6
20973 arrangments 4 11
8314 sparren 4 7
5076 life' 4 5
[2232 rows x 3 columns]
In [11]:
title = 'NMN'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for NMN:
spell_error count word_length
2957 aro 89 3
9302 leetsville 28 10
5842 willaman 26 8
176 dighton 22 7
3643 evart 21 5
2724 soo 20 3
6990 clellan 19 7
4149 myrta 18 5
4662 altho 18 5
7265 manistee 15 8
8423 beeler 15 6
5608 havo 14 4
523 sho 12 3
5262 tae 12 3
1493 armilda 12 7
1123 thoy 12 4
7469 tne 12 3
1710 vincinity 11 9
735 lich 11 4
8967 thos 11 4
726 ludington 11 9
6487 aee 11 3
2395 ich 9 3
4644 ence 9 4
8887 blesser 9 7
8851 lcrd 9 4
7784 ichigan 9 7
1109 confe 9 5
6809 wcrk 9 4
7731 sabath 8 6
1871 thoir 8 5
2744 sabbathschool 8 13
2064 pre 8 3
8343 irs 8 3
8731 ork 8 3
6430 nee 8 3
8466 ent 8 3
4616 ith 7 3
7838 anc 7 3
2461 recomend 7 8
2197 ehe 7 3
8606 'he 7 3
5589 eople 7 5
6022 rth 7 3
4167 ood 6 3
3932 fcr 6 3
3827 pooplo 6 6
5757 ths 6 3
5865 lichigan 6 8
5989 stedman 6 7
6566 manistique 6 10
3589 bracebridge 6 11
9607 djork 6 5
8696 baurain 6 7
8804 scottvillo 6 10
9311 sprague 6 7
5365 thr 5 3
922 sablath 5 7
8898 yoar 5 4
1688 sdhool 5 6
8933 nany 5 4
9285 hee 5 3
7580 laketon 5 7
1351 ilichigan 5 9
5789 sabbatheschool 5 14
1024 lan 5 3
5887 oportunity 5 10
9316 ime 5 3
1835 ars 5 3
6232 ele 5 3
6309 onference 5 9
6319 helmer 5 6
732 shoulu 5 6
669 millan 5 6
6438 nal 5 3
6486 ers 5 3
543 brothor 5 7
362 tht 5 3
6793 nester 5 6
6848 vory 5 4
5267 aed 5 3
6942 manton 5 6
5172 liesick 5 7
2276 chas 5 4
3191 ther 5 4
3119 tnat 5 4
8720 eichigan 5 8
2606 ake 5 3
8009 assionary 5 9
7986 ust 5 3
4324 woula 5 5
4346 ler 5 3
4405 eas 5 3
2603 helvig 5 6
5143 ment 5 4
4803 triplett 5 8
4840 schcol 5 6
4890 linos 5 5
2068 dingman 5 7
2038 nen 5 3
7477 lilah 4 5
7166 ick 4 3
8689 socioty 4 7
9508 feom 4 4
8560 hav 4 3
9345 eeople 4 6
7445 sehool 4 6
8289 haee 4 4
8771 theie 4 5
8250 t'e 4 3
9170 a'e 4 3
7910 wil 4 3
7842 esick 4 5
8796 ycu 4 3
8621 otc 4 3
86 shee 4 4
6862 sistor 4 6
3255 vith 4 4
2891 toskey 4 6
2664 somo 4 4
2622 tio 4 3
2582 tay 4 3
2553 ile 4 3
2543 'or 4 3
2047 whon 4 4
1762 lnrd 4 4
1527 fetoskey 4 8
1497 liko 4 4
1451 shoula 4 6
1420 ple 4 3
1367 ren 4 3
1313 potoskoy 4 8
828 aith 4 4
355 ang 4 3
264 seventhday 4 10
2938 alth 4 4
3334 bain 4 4
6697 achigan 4 7
3402 preyer 4 6
6627 tir 4 3
6412 ance 4 4
5775 kenney 4 6
5732 euickly 4 7
5479 ond 4 3
5392 ussionary 4 9
5383 yoe 4 3
5269 eee 4 3
4116 'the 4 4
4093 goffar 4 6
4055 mber 4 4
4001 onavay 4 6
3862 tions 4 5
3807 timo 4 4
3785 oraway 4 6
3756 ame 4 3
3474 thet 4 4
5129 peoele 4 6
In [12]:
title = 'PHJ'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for PHJ:
spell_error count word_length
697 sel 255 3
19978 ournal 129 6
278 societyl 80 8
16362 munn 73 4
4346 allerton 58 8
15963 misses' 56 7
17450 tion 54 4
3863 urnal 51 5
17316 teviperance 50 11
2849 'em 47 3
16736 cloe 47 4
11718 fahr 46 4
10923 sitz 45 4
2198 pre 45 3
14750 'the 39 4
20103 functual 38 8
18851 societya 37 8
6270 onehalf 36 7
8280 societyj 35 8
1905 jourxal 34 7
12956 ment 34 4
13211 cigaret 34 7
7143 vilas 34 5
12156 weiherweg 33 9
7734 monthlydevoted 32 14
2492 firstclass 30 10
15761 societys 30 8
10331 dyo 29 3
16018 thermo 29 6
8312 actina 29 6
10109 preventivesimple 28 16
6886 ance 27 4
19427 rowell 27 6
188 robb 26 4
18781 jenness 26 7
238 ful 26 3
7267 chas 26 4
19965 thos 25 4
19151 societym 25 8
13022 fehr 24 4
18703 societyc 24 8
6167 recipespost 24 11
1149 dio 24 3
9535 powes 24 5
6870 ralston 23 7
5665 cigarets 23 8
7948 japana 23 6
2510 stinson 22 7
246 nelia 22 5
9473 abbie 22 5
11872 rodolph 22 7
20550 washingall 21 10
13242 soo 21 3
8431 bahler 21 6
4511 akersgaden 21 10
6945 ioo 21 3
16664 wyman 21 5
14456 tions 20 5
8468 fasteningwith 20 13
10230 sah 20 3
5982 adjustably 20 10
10508 limbstroubles 20 13
10824 rocka 20 5
2062 drumm 19 5
17540 easton 19 6
5734 jou 18 3
15970 vill 18 4
10286 hechtman 18 8
856 lld 18 3
14263 carolinan 18 9
271 vith 18 4
4102 gauzes 18 6
12091 clure 17 5
13660 abouts 17 6
2898 sansome 17 7
19258 ventillation 17 12
18032 ish 17 3
9839 callyour 17 8
840 hutchings 17 9
13956 aimes 17 5
15755 bloodvessels 17 12
1379 depa 17 4
10670 nuttygrains 17 11
1099 dore 17 4
4846 dodds 16 5
3544 osed 16 4
3087 diseasea 16 8
16434 cambie 16 6
17175 illy 16 4
18367 ole 16 3
12535 pennellsuydam 16 13
2797 rnal 16 4
6012 rorer 16 5
8009 halfmorocco 16 11
20650 demorest 16 8
9656 rey 16 3
6311 englandn 16 8
19383 acific 16 6
13936 nux 16 3
8757 agt 16 3
... ... ... ...
11398 muchas 4 6
12290 spongings 4 9
11491 diretory 4 8
11495 dere 4 4
11642 correa 4 6
11731 ertal 4 5
11850 cise 4 4
11861 ite 4 3
12671 thinkin 4 7
10774 oue 4 3
10741 tht 4 3
13189 tink 4 4
10122 sleepingrooms 4 13
13308 recamier 4 8
13307 sicians 4 7
13281 goodbut 4 7
13234 keeley 4 6
10298 pres't 4 6
10335 wante 4 5
10685 doin 4 4
10380 logue 4 5
10526 masse 4 5
10578 murdock 4 7
10620 broster 4 7
10661 cata 4 4
12801 ijouseleld 4 10
8618 physiciani 4 10
8606 oliveoil 4 8
8521 ari 4 3
14463 creelc 4 6
5889 quired 4 6
16016 repre 4 5
15972 alabamad 4 8
5979 englands 4 8
6013 tts 4 3
6172 medi 4 4
6201 turbinated 4 10
6230 eunson 4 6
6310 ket 4 3
15816 twentytwo 4 9
15801 keeler 4 6
15773 labarriere 4 10
6460 rth 4 3
6493 niemeyer 4 8
6778 perience 4 8
16069 trir 4 4
5757 talofa 4 6
5730 tarlets 4 7
5236 m'clure 4 7
5081 irv 4 3
5084 kirkham 4 7
5088 shust 4 5
5221 cious 4 5
16429 xit 4 3
16368 masseed 4 7
5341 kneipp 4 6
5677 breethe 4 7
5423 indi 4 4
16252 'an 4 3
5553 stockines 4 9
5617 zoth 4 4
5638 childrenwill 4 12
11981 fralthfully 4 11
6910 gwine 4 5
15626 bress 4 5
15419 vrooman 4 7
14611 toa 4 3
14839 dess 4 4
8202 ial 4 3
14720 cta 4 3
8270 tobe 4 4
14626 theonlysewingmachine 4 20
8319 milfred 4 7
8343 eatty 4 5
8180 doan 4 4
8384 'sw 4 3
8464 neurine 4 7
8484 ost 4 3
14536 fcr 4 3
14504 pintsch 4 7
14468 sentinelone 4 11
14845 esculapius 4 10
8117 quartettes 4 10
15399 'make 4 5
7915 havergal 4 8
15388 spect 4 5
15375 tti 4 3
7552 gauses 4 6
7677 dillingham 4 10
15335 altho 4 5
15253 groshen 4 7
15171 konut 4 5
8072 cial 4 4
15065 'if 4 3
15014 mful 4 4
14967 sanitarims 4 10
8020 tne 4 3
14944 wery 4 4
14936 illustratedjust 4 15
16 foo 4 3
[852 rows x 3 columns]
In [6]:
title = 'PTAR'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for PTAR:
spell_error count word_length
71 ver 78 3
2050 'the 49 4
3524 ment 46 4
2269 holies 39 6
3671 tion 34 4
5131 'of 23 3
2944 storrs 23 6
5342 eze 17 3
2894 ments 17 5
5260 'to 13 3
933 ful 12 3
2946 ninevah 12 7
2558 thi 12 3
5761 tuary 12 5
419 nant 11 4
2572 pre 11 3
5570 tions 11 5
557 ture 10 4
2341 ble 10 3
2983 gon 9 3
1623 hagion 9 6
563 ofthe 9 5
5716 'was 9 4
4063 'that 9 5
105 ple 9 3
2352 mal 9 3
5126 vers 8 4
4718 'and 8 4
4141 dence 8 5
5472 waymark 7 7
1580 'is 7 3
378 rusalem 7 7
5404 topsham 7 7
303 jno 6 3
2198 ernacle 6 7
652 ile 6 3
270 lxxviii 6 7
4830 ond 6 3
2398 lviii 6 5
815 ceive 6 5
1385 macknight 6 9
3433 'his 6 4
3002 quities 6 7
464 ved 6 3
3661 inthe 6 5
1930 tience 6 6
5435 sus 6 3
4039 jeru 5 4
3680 tbe 5 3
4114 wil 5 3
4862 ery 5 3
3444 lieve 5 5
3287 cond 5 4
3746 numberer 5 8
3373 ance 5 4
3523 binius 5 6
3980 ish 5 3
3790 fassett 5 7
3556 provi 5 5
3440 withthe 5 7
5640 pickands 5 8
476 lished 5 6
446 xlv 5 3
937 'their 5 6
5528 chronologers 5 12
5186 eis 5 3
1877 swer 5 4
4905 hovah 4 5
4581 medo 4 4
5713 lxv 4 3
5648 tes 4 3
4558 daythe 4 6
5498 'in 4 3
5485 peo 4 3
5466 pired 4 5
4591 mation 4 6
5398 brn 4 3
5378 sation 4 6
4725 'from 4 5
5373 enq 4 3
4763 exthe 4 5
4776 lieved 4 6
252 'were 4 5
4779 fect 4 4
4288 jerico 4 6
5235 vation 4 6
5251 dren 4 4
537 tures 4 5
4197 pinney 4 6
1302 mit 4 3
2072 ged 4 3
1998 tant 4 4
1975 theni 4 5
1807 vir 4 3
1644 circleville 4 11
1430 cii 4 3
1233 ral 4 3
2374 ther 4 4
1149 newmoon 4 7
1095 millenium 4 9
744 sary 4 4
662 itt 4 3
615 nology 4 6
593 mandment 4 8
2161 thefulfillment 4 14
2441 ent 4 3
4026 worlda 4 6
3369 'or 4 3
3589 'by 4 3
332 millerism 4 9
3518 'but 4 4
3497 cxxxii 4 6
543 refered 4 7
3418 hea 4 3
3293 'be 4 3
2536 'you 4 4
103 tified 4 6
2994 shimper 4 7
379 rael 4 4
2831 thatthe 4 7
2640 yond 4 4
442 truththe 4 8
3214 sabbathday 4 10
In [13]:
title = 'PUR'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 0, 2, 'count')
print(results)
Summary for PUR:
spell_error count word_length
33982 tion 627 4
180 elhany 490 6
46410 seventhday 448 10
53016 ords 407 4
11275 ence 380 4
6552 ment 308 4
48811 chas 304 4
39826 sabbathschool 297 13
31687 pherson 287 7
45912 ference 281 7
3862 'the 273 4
27230 verah 260 5
39528 secretarym 236 10
40940 ers 231 3
13349 ber 222 3
9927 pepperwood 222 10
48419 ple 218 3
39220 sda 209 3
24261 twentyfifth 167 11
11802 sionary 163 7
28419 mis 161 3
48884 'of 155 3
6448 secretaryj 154 10
23279 pre 152 3
17531 tions 152 5
32643 agentf 150 6
22085 treasurerb 149 10
34739 committeee 147 10
52274 kinleyville 146 11
35218 'to 133 3
28351 humbert 128 7
51715 presidente 126 10
52 ary 126 3
22821 phcenix 125 7
15008 eral 124 4
7235 ent 122 3
5063 pac 121 3
22015 'and 120 4
37332 pasqual 119 7
35741 ful 116 3
33879 kibbin 115 6
45053 'union 115 6
11697 cific 108 5
30462 californianevada 105 16
43888 hebard 103 6
34920 ance 103 4
26007 edendale 101 8
24694 fornia 100 6
32992 rulison 97 7
49155 ern 94 3
22475 bers 92 4
38531 edu 89 3
22126 guire 89 5
41044 ments 86 5
46758 sabbathkeepers 85 14
30029 belvail 83 7
48966 twentyfive 80 10
1549 dren 80 4
40644 ble 79 3
35192 peo 78 3
28546 ture 74 4
51930 committeej 71 10
2443 paign 71 5
6527 ters 70 4
48943 tressa 70 6
30607 mayers 70 6
44518 ceived 69 6
17289 helligso 69 8
25107 nia 68 3
32574 fice 68 4
33866 lege 68 4
22682 secretaryw 66 10
29813 pencilgrams 66 11
27957 presidentj 65 10
2702 'in 65 3
38441 sions 65 5
30328 terest 64 6
41957 ning 64 4
4972 kenzie 61 6
6053 spriggs 60 7
43637 churchschool 60 12
43073 desmarets 59 9
53525 snideman 58 8
42711 nis 58 3
37847 ery 57 3
7652 tional 57 6
21645 findley 57 7
1228 sabbathschools 56 14
39009 wanteda 56 7
5502 inthe 55 5
26236 onehalf 55 7
4947 tarium 54 6
20712 ordrs 54 5
43747 ventist 53 7
22750 nellis 53 6
29221 althaus 53 7
38934 ren 52 3
53519 ottie 52 5
44458 ioo 52 3
12950 tive 51 4
... ... ... ...
19490 arwill 1 6
19491 ocality 1 7
19492 aeefrinvaued 1 12
19413 resociation 1 11
19412 filllah 1 7
19411 tearstained 1 11
19341 illhers 1 7
19332 ketc'aum 1 8
19333 convenone 1 9
19334 everythingwith 1 14
19335 iereby 1 6
19336 twentyexpenditure 1 17
19337 andkoss 1 7
19338 'tpposing 1 9
19339 calimissions 1 12
19340 seventhnight 1 12
19342 liabit 1 6
19328 patriif 1 7
19344 trueas 1 6
19348 framily 1 7
19349 iffeathing 1 10
19350 vey 1 3
19351 broththen 1 9
19352 whichhave 1 9
19353 knowlacquainted 1 15
19354 encouragconsideration 1 21
19355 eirpense 1 8
19330 ovotilt 1 7
19327 agpapers 1 8
19357 comcountry 1 10
19313 eservices 1 9
19301 stiperintendent 1 15
19302 likelyto 1 8
19303 septemsending 1 13
19304 unaca 1 5
19305 thalt 1 5
19307 bringhere 1 9
19308 mewith 1 6
19309 primiwill 1 9
19310 libetty 1 7
19315 campattend 1 10
19325 hopeduty 1 8
19316 faceto 1 6
19317 useof 1 5
19318 saniof 1 6
19319 dothese 1 7
19320 sanitaretable 1 13
19321 volare 1 6
19322 saniwho 1 7
19323 resaveci 1 8
19324 rade 1 4
19356 eyegate 1 7
19358 suii 1 4
19410 ioft 1 4
19397 chao 1 4
19387 anticipa 1 8
19388 extenunless 1 11
19389 sacrido 1 7
19390 carmichaela 1 11
19392 mimor 1 5
19393 bedtions 1 8
19394 mosiac 1 6
19395 nrany 1 5
19396 departgestion 1 13
19398 gairo 1 5
19384 messaore 1 8
19399 everycommunity 1 14
19400 watanga 1 7
19401 preseries 1 9
19402 baccalaurette 1 13
19403 faumi 1 5
19405 douthe 1 6
19406 prille 1 6
19408 peogood 1 7
19409 nebber 1 6
19386 unreliarect 1 11
19383 awaywhen 1 8
19359 sepaworld 1 9
19371 treastirer 1 10
19360 meantheir 1 9
19361 aplowed 1 7
19362 missionnoon 1 11
19363 womengave 1 9
19364 genwe 1 5
19366 believersalso 1 13
19367 sewith 1 6
19368 durenter 1 8
19369 serness 1 7
19372 thenceto 1 8
19382 unfavoris 1 9
19373 e'cientlida 1 11
19374 uct 1 3
19375 thingsin 1 8
19376 matanavat 1 9
19377 oeta 1 4
19378 vaiue 1 5
19379 sabforth 1 8
19380 preslege 1 8
19381 laorn 1 5
54011 wagonmaker 1 10
[53691 rows x 3 columns]
In [15]:
title = 'RH'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 10, 2, 'count')
print(results)
Summary for RH:
spell_error count word_length
228574 tion 5691 4
528644 'the 5093 4
64390 brn 3962 3
359021 ment 2885 4
385865 pre 2872 3
128283 seventhday 2847 10
601139 chas 2837 4
78614 'of 2796 3
309121 ets 2249 3
291436 eze 2209 3
579891 mal 2047 3
52843 'to 2030 3
332463 ahd 1988 3
209212 'and 1824 4
674347 sabbathschool 1771 13
642599 aro 1622 3
28185 tions 1538 5
88323 sel 1511 3
194290 'be 1475 3
366775 ence 1386 4
360182 ent 1320 3
29268 thi 1237 3
501423 ers 1234 3
4367 'in 1202 3
414567 ments 1155 5
599375 ver 1075 3
532312 tbe 1042 3
448553 ple 1040 3
65398 ble 1026 3
107355 ofthe 998 5
278367 sabbathkeepers 978 14
579962 ful 960 3
342546 sabba 959 5
217399 'by 943 3
450318 'that 900 5
45873 'he 850 3
282692 ber 775 3
391606 thos 754 4
422447 ference 737 7
9245 ance 733 4
248739 jno 732 3
96380 'is 730 3
598240 'have 728 5
505083 overcomer 721 9
78756 twentyfive 713 10
394166 mis 710 3
372411 tem 701 3
211974 ith 690 3
142738 ity 686 3
56726 ole 678 3
327877 tle 656 3
23743 'for 655 4
597100 xxiiil 655 6
571960 ther 644 4
672889 ren 639 3
576377 inthe 623 5
111480 'his 605 4
200579 bas 600 3
115450 bno 597 3
660508 xviil 590 5
142972 nee 587 3
275006 dobney 580 6
580577 xxivl 564 5
504388 sabbaton 557 8
529044 ous 548 3
546784 eral 541 4
79050 ern 540 3
638354 tidende 534 7
634126 xxiil 533 5
641418 whitford 529 8
361997 eview 528 5
216059 tian 528 4
245632 ioo 522 3
478536 xviiil 517 6
221592 agt 515 3
10287 ots 506 3
416438 firstday 505 8
594672 'but 503 4
356069 anb 503 3
348815 'has 503 4
45888 ture 503 4
536842 whi 494 3
466292 soo 493 3
324209 frisbie 491 7
682045 ceived 491 6
378126 medo 487 4
143450 peo 477 3
350974 dren 472 4
225416 'as 469 3
436843 tiie 466 4
301953 ise 458 3
632528 micr 458 4
16554 ject 457 4
532950 ters 456 4
381391 ure 449 3
593726 'been 448 5
128332 'we 443 3
49350 fon 441 3
201984 susp 438 4
65221 irs 434 3
... ... ... ...
192758 irm 11 3
192094 'happiness 11 10
595697 shumate 11 7
191245 itj 11 3
190774 'obey 11 5
596416 anumber 11 7
591710 rbirat 11 6
189820 monze 11 5
188954 debted 11 6
188767 upbn 11 4
596772 othat 11 5
188473 fufilled 11 8
186644 helieveth 11 9
193176 isees 11 5
594720 hoppie 11 6
193515 'officers 11 9
193808 bossert 11 7
594401 genf 11 4
194468 bam 11 3
194638 'district 11 9
195996 shouid 11 6
593485 vli 11 3
592592 puld 11 4
592456 newsom 11 6
196856 whoe 11 4
196913 gium 11 4
592364 esis 11 4
196922 increa 11 6
197124 ceptible 11 8
591847 e't 11 3
212304 nner 11 4
584729 nrk 11 3
213062 laof 11 4
225016 cutchan 11 7
226668 catastrophies 11 13
226624 diator 11 6
578695 autho 11 5
578704 tock 11 4
578972 stocker 11 7
225025 ttinto 11 6
225001 nicolaitans 11 11
213283 'ie 11 3
579297 beif 11 4
579409 dolorosa 11 8
224808 lawit 11 5
224395 'ni 11 3
224354 knowle 11 6
224245 'season 11 7
226887 lty 11 3
227156 vrt 11 3
578105 dli 11 3
228853 tiuth 11 5
229332 wilkie 11 6
576917 morni 11 5
230434 complishing 11 11
230443 'sign 11 5
576232 'difficult 11 10
231900 subjeot 11 7
232005 iaskell 11 7
232114 'spoken 11 7
232282 'conferences 11 12
232350 ponding 11 7
232429 eddyism 11 7
233242 terness 11 7
233305 'ir 11 3
223911 dehim 11 5
579797 virbrook 11 8
223697 'answer 11 7
217387 'possible 11 9
213510 tvittv 11 6
584305 'contains 11 9
584289 brom 11 4
584100 akt 11 3
213591 schbol 11 6
583743 carriacou 11 9
214442 publishi 11 8
214752 freeand 11 7
583459 polanders 11 9
215020 inary 11 5
215573 thousanddollar 11 14
583066 posure 11 6
582949 reatly 11 6
216962 tlds 11 4
218312 gions 11 5
579884 goor 11 4
218392 eartb 11 5
219767 coinmenced 11 10
220141 wito 11 4
220988 wara 11 4
221517 seim 11 4
221552 haller 11 6
581358 retu 11 4
221976 aftr 11 4
581267 'land 11 5
222401 beilhart 11 8
581094 characterthe 11 12
580738 whici 11 5
222731 pampangan 11 9
222872 nill 11 4
13 sabbatit 11 8
[14693 rows x 3 columns]
In [16]:
title = 'Sligo'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for Sligo:
spell_error count word_length
249 sligon 36 6
1214 schwab 30 6
1300 mattingly 22 9
2318 kuppenheimer 20 12
1058 kamoda 15 6
1938 herzog 14 6
1488 lippart 14 7
586 styleplus 14 9
1388 dietel 14 6
2321 geibel 13 6
1156 rebok 13 5
2293 kimonas 12 7
49 flather 12 7
1849 chesnutt 11 8
1371 ahrens 11 6
1335 friedlander 11 11
1226 greiner 11 7
519 ailes 11 5
1973 quartette 10 9
2392 cardia's 10 8
1090 lenoa 10 5
2403 furnishers 10 10
725 slgonian 9 8
2402 woolgar 9 7
1477 grosner 9 7
2197 iverson 8 7
562 ott 8 3
151 gradye 8 6
2118 kollege 8 7
909 herbst 8 6
2233 minola 8 6
103 blackistone 8 11
1122 kupjian 8 7
712 hallowe'en 8 10
2294 chas 8 4
2316 newmyer 8 7
1129 zink 7 4
2115 battleford 7 10
74 schilberg 7 9
1968 estep 7 5
615 yoshihiro 7 9
558 klothes 7 7
1325 clapp 7 5
1430 tvedt 6 5
1448 voorhis 6 7
1485 nevius 6 6
302 botsford 6 8
1693 deitel 6 6
2098 boquets 6 7
1698 feldman 6 7
1889 jeffries 6 8
2000 dulany 6 6
1278 labrot 6 6
2347 brines 6 6
2394 hirsh's 6 7
1326 rozier 6 6
247 ryneal 6 6
788 muth 6 4
496 sevrens 6 7
1073 monsen 6 6
948 woodwardand 6 11
935 iden 6 4
1130 coyl 6 4
1016 duval 6 5
1243 harkins 6 7
94 preferwhether 5 13
1853 loasby 5 6
1662 carnig 5 6
1770 beamesderfer 5 12
667 gerhart 5 7
2351 greutman 5 8
1726 mercereau 5 9
2254 transtrom 5 9
757 ingeborg 5 8
1020 colea 5 5
1700 barto 5 5
765 treible 5 7
1529 nanking 5 7
592 dyoll 5 5
533 ablewhen 5 8
2001 llylel 5 6
1166 windon 5 6
512 wyche 5 5
1420 prohis 5 6
1357 resseguie 5 9
414 clemen 5 6
53 classmen 5 8
141 callier 5 7
710 washingtondc 4 12
2119 frankin 4 7
1255 maybelle 4 8
692 pleasants 4 9
98 liij 4 4
2353 kimber 4 6
2364 tunesassa 4 9
662 kaelin 4 6
650 yelland 4 7
622 paperyou 4 8
934 dimmock 4 7
1927 pre 4 3
943 willman 4 7
1887 tattbg 4 6
316 siagonian 4 9
1164 eulah 4 5
1568 accessable 4 10
1571 latrobes 4 8
1623 maye 4 4
1742 virbrook 4 8
1760 kewley 4 6
1890 mallatt 4 7
2050 workcleaning 4 12
468 munsch 4 6
1958 mattison 4 8
950 sangster 4 8
108 glickman 4 8
1989 ite 4 3
1996 idetta 4 6
2026 feely 4 5
564 ooletwah 4 8
In [17]:
title = 'SOL'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for SOL:
spell_error count word_length
7320 bsl 79 3
2290 agl 51 3
6457 mutchler 44 8
6427 sabbatteans 38 11
3200 loth 38 4
262 tion 37 4
4502 'the 33 4
4139 farmakis 29 8
7683 ment 28 4
3089 saloonmen 27 9
3896 sundayclosing 26 13
4660 ioo 25 3
5003 wctu 24 4
1777 combinationthe 24 14
4242 kishineff 23 9
5299 faul 22 4
7843 schurman 20 8
717 selfgovernment 20 14
6981 rampolla 20 8
6735 sundayno 19 8
957 ourduty 19 7
6102 allister 19 8
6371 seventhday 19 10
1600 saloonkeepers 19 13
7365 theliquor 18 9
6441 socalled 17 8
180 platt 16 5
5179 'of 16 3
4775 tions 15 5
3487 pre 15 3
6932 saloonkeeper 15 12
1808 chas 14 4
2287 sundayenforcement 14 17
6541 thos 14 4
3115 birney 14 6
6835 'to 13 3
1835 tien 13 4
2934 temperanceand 13 13
1524 muskoka 12 7
4292 milman 12 6
7044 churchand 12 9
5162 guidi 12 5
4509 tsin 12 4
1593 grocerymen 11 10
5727 satolli 11 7
1617 ricans 11 6
270 sundaylaw 11 9
5720 birnie 11 6
1411 hine 10 4
3165 'with 10 5
1350 parte 10 5
7435 gohier 10 6
622 mala 10 4
5723 lawabiding 9 10
2205 godgiven 9 8
7552 postoffices 9 11
2536 employes 9 8
713 jailor 9 6
2307 munn 9 4
5603 farreaching 9 11
2014 'and 9 4
2006 twentyfive 9 10
1684 vires 9 5
4370 freethought 9 11
2087 brien 8 5
5168 thwing 8 6
3234 montns 8 6
1531 humbert 8 7
7028 tian 8 4
7015 ance 8 4
3122 cossa 8 5
6286 philipps 8 8
401 epist 8 5
5804 rican 8 5
3911 pendergast 8 10
2757 'that 8 5
3375 erty 8 4
4274 secularities 8 12
6888 'is 8 3
1040 ernment 8 7
4376 ljudge 7 6
3320 octabo 7 6
3210 octa'bo 7 7
7847 cormenin 7 8
7120 sparhawk 7 8
279 bergfeldt 7 9
7024 legislationa 7 12
6771 greenburg 7 9
1098 boutwell 7 8
7417 broussa 7 7
7630 weyler 7 6
3737 tke 7 3
5174 charta 7 6
7445 trevier 7 7
7439 beckler 7 7
5150 enactmentment 6 13
5105 seuleuz 6 7
3596 brownists 6 9
5237 martinelli 6 10
3878 hillis 6 6
... ... ... ...
2223 polver 5 6
4203 americanists 5 12
1679 anagni 5 6
484 dechristianizing 5 16
4287 ther 5 4
3374 coun 5 4
4489 jaycox 5 6
4740 rin 5 3
2371 reconcentration 5 15
4546 priebe 5 6
2677 smyth 5 5
2556 benchmen 5 8
4602 chaingang 5 9
243 sabbathbreaking 5 15
215 tothe 5 5
1935 vannutelli 4 10
6654 indefeasable 4 12
7023 cohn 4 4
1682 goldwin 4 7
1742 'blue 4 5
1884 gebennus 4 8
2217 combinaion 4 10
1993 teris 4 5
6792 itis 4 4
6720 riis 4 4
2279 pecci 4 5
2251 buehler 4 7
2228 christion 4 9
1003 turlupins 4 9
1605 lowrie 4 6
501 eell 4 4
7851 oxman 4 5
7795 corario 4 7
7791 peoplethe 4 9
7753 ized 4 4
7733 issueii 4 7
288 proudfit 4 8
289 thingseither 4 12
7644 ters 4 4
363 ual 4 3
505 christain 4 9
1277 willi 4 5
590 shopman 4 7
769 papacythe 4 9
774 appli 4 5
852 fortynine 4 9
977 kensil 4 6
2450 implysa 4 7
1036 illne 4 5
1080 tional 4 6
1130 yalova 4 6
2384 segal 4 5
4766 ncopy 4 5
2527 protestante 4 11
5180 tll 4 3
4108 morrissey 4 9
4111 crescy 4 6
5588 ridpath 4 7
5443 crimmins 4 8
5416 sabath 4 6
5370 ence 4 4
5331 connectedly 4 11
5295 franke 4 6
4272 thibet 4 6
4508 'when 4 5
3732 julydecember 4 12
4532 firstday 4 8
4539 oth 4 3
5128 vali 4 4
5119 relig 4 5
4553 gottlieb 4 8
4557 hoppe 4 5
4690 violi 4 5
4854 mccorkle 4 8
4699 sir' 4 4
3767 greenstein 4 10
3675 por 4 3
6519 soo 4 3
6110 demagogism 4 10
2694 fora 4 4
6395 haye 4 4
2911 christthe 4 9
6364 'person 4 7
6346 ovr 4 3
3024 meeser 4 6
6222 sabbathkeeping 4 14
6140 mallalieu 4 9
3169 apos 4 4
3182 ipany 4 5
3522 ymeaornths 4 10
6069 ious 4 4
3199 pettingill 4 10
6010 sixmonths 4 9
5926 prive 4 5
5921 syar 4 4
3358 oneseventh 4 10
5799 hirsch 4 6
3428 gobernment 4 10
3473 tth 4 3
42 selfevident 4 11
[258 rows x 3 columns]
In [18]:
title = 'ST'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for ST:
spell_error count word_length
143577 tion 2185 4
113227 'the 1527 4
183046 eze 1301 3
188020 altho 1275 5
6577 ment 1184 4
23300 pre 791 3
194004 ets 791 3
49006 'of 782 3
3487 sel 778 3
17579 tions 614 5
145305 mal 565 3
199614 'to 543 3
72990 aro 534 3
131547 'and 531 4
41155 ments 497 5
158770 chas 451 4
11333 ence 431 4
80289 seventhday 392 10
145362 ful 370 3
95919 ers 358 3
5804 ance 354 4
212384 fbr 340 3
62335 ple 340 3
40743 ble 325 3
31478 stuttle 316 7
101645 ity 313 3
63492 'that 303 5
204350 sabbathschool 296 13
2720 'in 287 3
121226 thoroly 266 7
135817 tian 263 4
182936 'em 262 3
7277 ent 259 3
124363 geikie 253 6
130927 igns 250 4
64628 synagog 240 7
176478 thruout 239 7
60157 'is 237 3
139992 gigno 234 5
67187 ofthe 233 5
28654 tht 226 3
97836 sabbaton 224 8
191291 ber 213 3
137965 cigaret 204 7
218177 clure 199 5
113457 ous 199 3
203045 thi 197 3
1568 dren 196 4
20492 ure 194 3
42426 gilfillan 193 9
107206 tle 192 3
28458 mis 191 3
141254 allister 189 8
122747 employes 188 8
122164 'be 178 3
183217 lld 178 3
115487 tbe 175 3
143065 inthe 175 5
120831 neander 166 7
186303 ther 163 4
65198 arv 163 3
105796 cruden 157 6
14795 'for 157 4
111276 mandments 152 9
49084 twentyfive 149 10
42696 moneyorders 149 11
120282 gign 147 4
98269 overcomer 147 9
4852 thoro 147 5
51282 ioo 146 3
76415 robb 145 4
157667 ver 142 3
209182 ceived 142 6
172962 cigarets 142 8
28556 'he 140 3
90237 'neath 139 6
124562 eral 139 4
115900 ters 139 4
115619 socalled 138 8
110014 nal 138 3
92821 sionary 137 7
133260 ith 137 3
111835 tem 136 3
17679 'not 136 4
108430 ise 135 3
18098 ght 135 3
141626 'as 134 3
144909 'it 132 3
203017 sions 130 5
162787 thos 130 4
89982 peo 129 3
49140 ures 129 4
22520 'by 127 3
104369 tite 125 4
72036 ished 124 5
54255 ary 123 3
133856 sus 123 3
157613 sigjts 123 6
59052 eousness 123 8
193729 mony 123 4
... ... ... ...
84442 serviee 4 7
167637 serrant 4 7
166845 sorrowless 4 10
85824 'red 4 4
85838 somo 4 4
166393 merse 4 5
166074 calledto 4 8
86829 nothwith 4 8
86825 messager 4 8
166150 amples 4 6
166161 noother 4 7
86763 barra 4 5
166207 orach 4 5
166263 meeti 4 5
86701 wricox 4 6
86672 errys 4 5
86640 earlie 4 6
86600 'coo 4 4
166371 godemark 4 8
166399 denarii 4 7
85965 thecharacter 4 12
86541 itselfas 4 8
86536 excus 4 5
86434 thegood 4 7
86424 'turn 4 5
86380 clesar 4 6
86337 secute 4 6
166569 thehighest 4 10
86140 zangwill 4 8
86087 studen 4 6
86047 derous 4 6
86033 kolhapur 4 8
166725 'command 4 8
166733 bleness 4 7
167650 alzog 4 5
167675 conditiona 4 10
82560 owu 4 3
84404 onment 4 6
168478 rtonement 4 9
168551 ishe 4 4
83111 jscellaneous 4 12
168593 oursel 4 6
168611 tijles 4 6
168649 saintsa 4 7
83077 acrs 4 4
83064 nino 4 4
168705 brabourne 4 9
168721 cribed 4 6
168729 aiwa 4 4
83052 linde 4 5
83016 'please 4 7
83011 llu 4 3
168852 turtullian 4 10
82972 amsdorf 4 7
168854 imbe 4 4
168881 'indeed 4 7
82899 'communications 4 15
82880 thgm 4 4
82842 riously 4 7
82796 sunto 4 5
169091 sethe 4 5
169093 imself 4 6
82651 sau 4 3
169148 beasee 4 6
169205 'husband 4 8
82581 cfesar 4 6
82569 weilheimer 4 10
168453 oty 4 3
83160 grimage 4 7
168395 fbllow 4 6
168068 esculapius 4 10
84374 nually 4 6
84274 sunclay 4 7
167732 ipr 4 3
167757 greatand 4 8
84241 orby 4 4
84223 vati 4 4
84068 wrrn 4 4
84064 izi 4 3
84043 pecul 4 5
83925 jenin 4 5
83717 whieli 4 6
168014 by' 4 3
83685 protestingly 4 12
168101 unimpeached 4 11
168389 worldof 4 7
83613 efir 4 4
168119 hagios 4 6
168150 entree 4 6
168190 hothe 4 5
83507 destinyof 4 9
83463 pastthe 4 7
83409 midyat 4 6
168270 testhe 4 6
168271 sery 4 4
83407 vergeze 4 7
83403 moffitt 4 7
168315 yosemitevalley 4 14
83291 salvaand 4 8
109488 tomer 4 5
[13500 rows x 3 columns]
In [19]:
title = 'SUW'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for SUW:
spell_error count word_length
33462 bfl 912 3
2585 agts 838 4
30115 chas 433 4
33942 ords 415 4
10650 bracy 289 5
15563 vagh 282 4
26188 wks 264 3
14101 billups 241 7
17041 chastain 238 8
22054 lennan 233 6
33542 seventhday 219 10
5501 peevy 210 5
18471 schroader 205 9
28371 reichenbach 205 11
33473 ppe 203 3
21102 chenault 193 8
22854 colrey 191 6
4971 'the 191 4
1757 tew 175 3
17521 sof 172 3
29186 ppp 170 3
19603 allman 164 6
34911 thos 157 4
9021 tion 149 4
19270 manous 147 6
28960 ern 138 3
16411 winkler 132 7
17156 hustburg 131 8
28907 griffiths 129 9
18416 bfi 127 3
24703 ference 125 7
31331 lura 122 4
29503 cannada 121 7
19487 ntp 113 3
3918 allran 113 6
2744 cennessee 112 9
17989 rayford 106 7
5040 parkins 104 7
28625 'of 98 3
2688 bodwell 97 7
9029 morphew 96 7
1811 mis 95 3
16723 sabbathschool 93 13
22797 ioo 92 3
14794 ence 90 4
19497 sofp 88 4
15219 deliv'd 86 7
11173 berdon 85 6
16527 ewald 84 5
25736 deliv 84 5
19324 millar 84 6
7666 hirst 78 5
25611 whitford 78 8
30371 pre 77 3
28301 charlsey 76 8
29340 ridder 76 6
5655 wor 76 3
23472 elhany 72 6
34715 memb 70 4
17905 minnis 70 6
2890 romines 69 7
25891 womack 69 6
7571 'to 69 3
32592 krauss 69 6
21563 reiber 68 6
16221 ment 67 4
25313 jno 67 3
18425 sherer 67 6
18212 parizetta 67 9
18671 perthia 66 7
2099 achenbach 65 9
17520 ber 64 3
18144 ers 64 3
28746 'and 64 4
9210 ellabama 63 8
3500 'in 62 3
33738 tri 61 3
12760 frisby 60 6
24067 stoc 60 4
19041 ypmv 60 4
20909 lettie 59 6
19234 totalsa 59 7
1025 garrigan 59 8
28737 twentyfive 57 10
2461 neill 57 5
14968 cheshier 57 8
15118 sewellton 56 9
19583 lanier 56 6
2544 shasky 56 6
14420 drbr 56 4
20272 leod 56 4
21808 ppv 56 3
32096 sie 55 3
29980 bpi 54 3
4938 sellars 52 7
6275 pendas 51 6
29296 woodall 51 7
12911 elford 51 6
25814 sabbathkeepers 51 14
22553 walbert 50 7
... ... ... ...
28427 gesting 4 7
7778 thp 4 3
7720 'till 4 5
7681 arkebauer 4 9
7677 wou 4 3
28546 'goo 4 4
28563 urday 4 5
28594 'three 4 6
7630 periences 4 9
7603 rti 4 3
28642 'see 4 4
7577 sani 4 4
7536 ednesday 4 8
28763 boox 4 4
28797 ures 4 4
9804 seuenth 4 7
26364 mayde 4 5
12670 wasteless 4 9
11918 tennesssee 4 10
11908 desir 4 5
11877 patzkowski 4 10
11727 dence 4 5
11623 llie 4 4
11512 tablished 4 9
24591 conierence 4 10
24635 axwm 4 4
11423 throughthe 4 10
11392 wdr 4 3
11364 iana 4 4
24706 encour 4 6
11341 pebruary 4 8
11208 wth 4 3
11138 elle 4 4
24854 truthladen 4 10
24855 twa 4 3
11097 contribs 4 8
11914 mura 4 4
24449 o'erflow 4 8
24962 'given 4 6
24435 gartley 4 7
12665 recieved 4 8
24127 sse 4 3
24137 isters 4 6
24143 binks 4 5
24188 us' 4 3
12401 ellabatna 4 9
24200 oin 4 3
12382 reso 4 4
12314 loinstana 4 9
12280 retaries 4 8
24285 gra 4 3
24323 wnorwood 4 8
12226 contro 4 6
12185 vayne 4 5
12169 churche 4 7
12102 oodsmark 4 8
12037 pvenue 4 6
11017 seruant 4 7
24966 ited 4 4
26356 welltrained 4 11
10597 haue 4 4
10330 bof 4 3
10276 mangin 4 6
10242 misssionary 4 11
25922 sath 4 4
25947 'their 4 6
10121 ftw 4 3
26048 soutitern 4 9
10111 denomi 4 6
10056 frf 4 3
26155 alister 4 7
26157 sfoc 4 4
10016 fausset 4 7
26258 nealy 4 5
26269 ized 4 4
26271 oneof 4 5
9922 spearwk 4 7
26306 andit 4 5
25726 hree 4 4
25674 jonesbf 4 7
24968 stn 4 3
10608 asse 4 4
11008 notia 4 5
11001 zeichen 4 7
25084 bef 4 3
10963 psr 4 3
10912 idi 4 3
10910 thereis 4 7
25259 twould 4 6
25284 'experience 4 11
25312 profes 4 6
25340 delied 4 6
25395 tennes 4 6
10754 fordbr 4 6
10752 ect 4 3
25592 shornburg 4 9
10680 essary 4 6
10636 adkisson 4 8
10614 fel 4 3
9447 ceeded 4 6
[2098 rows x 3 columns]
In [20]:
title = 'TCOG'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for TCOG:
spell_error count word_length
5831 'the 106 4
8140 eze 59 3
2897 mal 55 3
2492 'of 51 3
4803 tbe 45 3
6006 mayta 45 5
6951 scudder 40 7
2630 'and 39 4
8792 'to 38 3
7083 agtte 36 5
6087 missi 35 5
8918 seventhday 33 10
7709 hsi 33 3
8872 hasan 28 5
71 epartment 27 9
7036 cburth 25 6
7958 hunchy 24 6
3343 cburtb 24 6
4140 'in 24 3
2122 tion 22 4
1977 'for 22 4
4553 pre 21 3
4260 'be 18 3
1161 puno 18 4
5237 neesima 17 7
2929 outschools 17 10
6241 cburcb 17 6
9194 metlakahtla 15 11
6846 crehore 14 7
9096 tne 14 3
7032 perces 13 6
9914 nez 13 3
7168 thi 13 3
5301 tay 13 3
3281 occum 12 5
7558 idona 12 5
1768 soulwinning 12 11
2374 goin 12 4
7781 ise 12 3
2289 'he 12 3
8126 jule 12 4
3160 buresala 11 8
5325 alf 11 3
7956 'em 11 3
2682 'all 11 4
10135 legiac 11 6
8869 dilawur 11 7
9001 'was 11 4
1750 gon 11 3
229 him' 11 4
1439 obookiah 10 8
2745 'his 10 4
5913 you' 10 4
2341 johan 10 5
7906 seino 10 5
2619 twentyfive 10 10
9331 aette 10 5
3517 serkey 10 6
3615 'that 10 5
8115 nyasaland 10 9
3939 wantedyoung 10 11
9895 ment 10 4
5258 'work 9 5
9799 solusi 9 6
5520 muramatsu 9 9
9260 phuloo 9 6
3598 them' 9 5
3918 'they 9 5
6033 sangster 9 8
617 'one 9 4
6894 hoa 9 3
8179 finster 9 7
2926 selfdenial 9 10
7601 'as 9 3
7823 'church 9 7
1506 neddie 9 6
2307 'it 9 3
8059 thei 9 4
3040 turvy 8 5
6504 litsi 8 5
7961 cburrb 8 6
3401 hetty 8 5
5289 greatorex 8 9
9736 guianas 8 7
68 havergal 8 8
5418 it' 8 3
2137 mis 8 3
2325 abu 8 3
4076 tidens 8 6
9081 ofthe 8 5
4564 'will 8 5
2473 god' 8 4
1144 tosti 8 5
3870 mehemet 8 7
1275 tiie 7 4
1103 thome 7 5
297 nee 7 3
2514 pietro 7 6
9793 floy 7 4
2688 mit 7 3
... ... ... ...
1411 thechurch 4 9
8200 zwemer 4 6
9414 'second 4 7
9759 oldfashioned 4 12
9788 'an 4 3
1383 grose 4 5
564 cooey 4 5
9149 thosewho 4 8
458 thd 4 3
2254 disfellowshiping 4 16
713 brower 4 6
826 ent 4 3
843 'missionary 4 11
9462 'some 4 5
846 servi 4 5
849 fiske 4 5
9630 'most 4 5
9633 herzog 4 6
8969 brouilette 4 10
8921 pitania 4 7
891 ments 4 5
926 faraoa 4 6
1060 cleland 4 7
9692 'said 4 5
1176 hannington 4 10
9745 notruction 4 10
1181 misiones 4 8
8767 chri 4 4
517 nickie 4 6
9769 'would 4 6
8678 hav 4 3
577 kno 4 3
7373 sionarp 4 7
2274 grythyttehed 4 12
5544 fon 4 3
6369 qur 4 3
2945 aleander 4 8
2953 godward 4 7
6235 spe 4 3
6180 'asked 4 6
6140 hini 4 4
2968 ful 4 3
3045 fiveminute 4 10
3071 ingruction 4 10
3089 chau 4 4
5840 'when 4 5
3124 ood 4 3
3125 bao 4 3
5685 wil 4 3
3170 week' 4 5
6422 farningham 4 10
3183 cial 4 4
3188 malekula 4 8
3223 papeite 4 7
5365 conkey 4 6
3268 vendek 4 6
3303 threeminute 4 11
3607 hla 4 3
4730 fello 4 5
4671 'time 4 5
3752 'should 4 7
4518 orno 4 4
3833 ist 4 3
4032 afterwhile 4 10
4166 do' 4 3
2894 peo 4 3
6435 katagiri 4 8
2278 ole 4 3
4061 sabati 4 6
2329 kading 4 6
7896 tsui 4 4
7840 biddings 4 8
7826 elo 4 3
2363 'our 4 4
7808 'good 4 5
7806 speak' 4 6
2420 imo 4 3
7701 barotseland 4 11
7643 messagefilled 4 13
7636 us' 4 3
7616 mur 4 3
2532 kikuvi 4 6
2539 selfsacrifice 4 13
7347 hurch 4 5
6476 iors 4 4
7184 'had 4 4
7172 tohouse 4 7
2610 sunshiners 4 10
7129 sions 4 5
7125 outschool 4 9
7045 'two 4 4
2770 yekichi 4 7
6979 ence 4 4
2787 guire 4 5
6919 uplook 4 6
6909 'under 4 6
6882 ous 4 3
6704 tae 4 3
6598 hildah 4 6
5105 testi 4 5
[331 rows x 3 columns]
In [21]:
title = 'TMM'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for TMM:
spell_error count word_length
4361 raratonga 43 9
3409 buluwayo 37 8
1260 stauffer 20 8
4101 carthy 20 6
1743 kalaka 20 6
1813 karmatar 20 8
2049 hausaland 19 9
2626 okohira 18 7
6066 hasegawa 18 8
4942 schwantes 17 9
2234 basle 17 5
5005 couva 17 5
299 sul 17 3
5688 sabbathschool 16 13
5871 raiatea 15 7
5101 seventhday 15 10
5750 tongatabu 15 9
3775 ioo 14 3
4909 helsingfors 14 11
1087 zambesi 14 7
1536 parana 13 6
2978 mangaia 13 7
4326 ventists 13 8
4014 shiba 12 5
3880 rosas 12 5
2167 ruatan 12 6
5301 crespo 12 6
72 gosmer 12 6
4877 arrowauks 11 9
1336 mis 11 3
1331 ricans 11 6
2965 hungaria 11 8
2379 okahira 11 7
5798 truxillo 11 8
5355 spreckels 11 9
3666 'the 10 4
308 shakker 10 7
4835 nonebala 10 8
339 juticalpa 10 9
5495 talafo 10 6
2193 caribbees 10 9
3410 brethern 9 8
392 newyork 9 7
4916 cherentes 9 9
272 eromanga 9 8
3433 asuncion 9 8
2132 muleback 9 8
200 tion 9 4
4214 kupavula 9 8
2718 palmquist 9 9
3965 sionary 8 7
1435 doble 8 5
5919 henton 8 6
28 esthonians 8 10
1905 cina 8 4
1858 dolphijn 8 8
2178 bluefields 8 10
1764 kumpel 8 6
6168 seamans 7 7
3384 esthonian 7 9
1846 rican 7 5
3674 tsin 7 4
332 aitutaki 7 8
760 neuva 7 5
3773 iery 7 4
2781 skaguay 7 7
3861 montg 7 5
1214 cakobau 7 7
2631 loth 7 4
1225 pellice 7 7
5267 makatea 7 7
3289 lettonian 7 9
3583 crowther 7 8
5144 fukuin 7 6
1472 bootooba 7 8
1498 chas 7 4
3980 mandioca 7 8
2508 agt 6 3
2861 pre 6 3
5027 eze 6 3
4596 escobar 6 7
1098 parvo 6 5
53 ary 6 3
634 pauliasi 6 8
5493 tal 6 3
5490 jno 6 3
5377 learsy 6 6
894 kwangsi 6 7
4641 torre 6 5
996 multum 6 6
984 tse 6 3
2018 ellery 6 6
5224 helvecia 6 8
4830 peverini 6 8
1355 fel 6 3
5054 goteborg 6 8
1519 tien 6 4
3266 umkupavula 6 10
5235 fte 6 3
5236 naini 5 5
3697 lettonians 5 10
3778 fonds 5 5
5592 weekapril 5 9
5643 olancho 5 7
3852 tions 5 5
5843 sabbathkeepers 5 14
5847 handsworth 5 10
5864 titikavaka 5 10
6109 readingsabbath 5 14
4146 blancher 5 8
3305 weekdecember 5 12
5350 ladrone 5 7
16 marash 5 6
6208 ramabai 5 7
521 stanmore 5 8
1563 helouan 5 7
2157 por 5 3
1460 levuka 5 6
2169 dwyer 5 5
892 balada 5 6
2260 marchisio 5 9
2267 pago 5 4
2333 tung 5 4
2557 moko 5 4
2575 chaux 5 5
2853 owari 5 5
823 adamson 5 7
2892 sundayschool 5 12
2949 makomp 5 6
282 toltecs 5 7
229 caribbee 5 8
2948 weekjuly 5 8
5076 nyanza 4 6
5061 bilaspur 4 8
6196 moana 4 5
6177 levu 4 4
1246 roko 4 4
1632 ostlund 4 7
1456 afric 4 5
241 fulahs 4 6
1383 'to 4 3
1357 maritzburg 4 10
1250 nection 4 7
1710 kalopothakes 4 12
5128 robie 4 5
1056 tral 4 4
1220 temne 4 5
319 hausfreund 4 10
445 bedros 4 6
453 canje 4 5
852 weekjanuary 4 11
844 bethuks 4 7
4988 gth 4 3
5496 weekmay 4 7
478 ver 4 3
790 curityba 4 8
705 indo 4 4
606 philopappos 4 11
540 arrowauk 4 8
536 ass'n 4 5
1753 taquary 4 7
3242 signes 4 6
1834 mal 4 3
2038 stellenbosch 4 12
3898 witte 4 5
2668 mollendo 4 8
2674 sentative 4 9
2712 guanaja 4 7
2827 colvin 4 6
3713 sepe 4 4
3649 nickerie 4 8
3585 barotse 4 7
3116 savu 4 4
3559 pharoah 4 7
3543 kwang 4 5
3123 preceeding 4 10
3155 ilissionary 4 11
3175 verbeck 4 7
3191 galletas 4 8
2364 bardizag 4 8
2353 hepatization 4 12
4019 pontypridd 4 10
2159 cantlie 4 7
2079 vou 4 3
2087 onehalf 4 7
4851 comandi 4 7
3336 thework 4 7
4819 vavau 4 5
4745 selfsupporting 4 14
4563 voz 4 3
4035 weekseptember 4 13
4457 kalmucks 4 8
2205 seventyfive 4 11
2240 weekfebruary 4 12
4288 geraes 4 6
4287 stoever 4 7
4248 chineseman 4 10
2139 sarmiento 4 9
In [22]:
title = 'WMH'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for WMH:
spell_error count word_length
6161 sabbathschool 170 13
2631 presidenta 75 10
5854 secretarym 61 10
3801 treasurere 61 10
581 numbersin 53 9
5936 numbessin 52 9
1071 horr 39 4
3898 'the 36 4
5002 wyla 32 4
2785 kee 32 3
5499 seventhday 32 10
3796 presidentm 32 10
195 blendon 32 7
479 brower 31 6
5641 harnden 30 7
3945 cleora 27 6
4037 ioo 25 3
2456 sabbathschools 25 14
5774 nunica 23 6
2841 chas 23 4
228 tion 22 4
5964 psa 20 3
5893 'to 20 3
3277 loth 20 4
2148 numbess 18 7
6270 hoffstra 18 8
910 michi 18 5
5207 drury 18 5
1776 'and 17 4
2256 convis 16 6
6634 ment 16 4
4663 ence 16 4
5567 secretarys 15 10
6345 sabbathkeepers 15 14
2724 editov 15 6
3276 diamondale 15 10
1954 mal 15 3
566 treasurerd 15 10
1692 'of 14 3
3248 numbeesin 14 9
1837 ainger 14 6
6399 'field 14 6
4656 vinancial 14 9
5750 numbepsin 13 9
3498 ith 13 3
5975 ctory 13 5
5885 ass'n 12 5
2315 myrta 12 5
5287 bilz 12 4
436 gathereti 12 9
5539 dirlctory 12 9
4275 messer 11 6
2413 'that 11 5
2048 benefitted 11 10
3563 see'y 11 5
1319 'for 11 4
441 hevald 11 6
1180 foy 11 3
2556 harriot 10 7
3616 rgo 10 3
5360 ereth 10 5
2568 ist 10 3
1568 almeda 10 6
212 fvom 9 4
6740 gatereth 9 8
6019 onehalf 9 7
2759 'in 9 3
2671 editop 9 6
1337 sendebud 9 8
3540 gravelle 9 8
1767 twentyfive 9 10
1997 consistant 8 10
485 tgo 8 3
6273 altho 8 5
5410 eze 8 3
5276 ordis 8 5
6066 'be 8 3
2847 thr 8 3
1861 numbevsin 8 9
3936 oth 8 3
6702 soo 8 3
500 ers 8 3
1321 ple 8 3
5465 ference 8 7
1598 mchugh 8 6
1951 palmiter 8 8
5277 allister 8 8
1425 mis 8 3
199 educationa 8 10
3298 garton 7 6
3376 phippeny 7 8
366 vield 7 5
5482 schooi 7 6
6295 mavgavet 7 8
5846 ance 7 4
3426 ilee 7 4
3032 pre 7 3
2216 whi 7 3
2235 hof 7 3
5126 hsi 7 3
... ... ... ...
1051 allthe 6 6
1232 numbewsin 6 9
2491 thro 6 4
352 hausfreund 6 10
3431 veap 6 4
1800 tobe 6 4
1970 selfdenial 6 10
6183 whitford 5 8
5582 swahn 5 5
6100 prpartnunt 5 10
4920 vicepresident 5 13
4989 approbativeness 5 15
984 ilaughey 5 8
5051 vaktare 5 7
2102 educa 5 5
5374 gth 5 3
1325 scandanavian 5 12
2485 arnadon 5 7
4769 sions 5 5
1486 sooncoming 5 10
2455 cudney 5 6
5452 ssued 5 5
1652 matthewson 5 10
5771 iio 5 3
6250 hotstra 5 7
2338 seventyfive 5 11
2842 numbeasin 5 9
3096 medicial 5 8
3499 sel 5 3
6756 tti 5 3
149 rooo 5 4
3895 sundayschool 5 12
6335 natches 5 7
4001 thallie 5 7
3245 numbessln 5 9
591 ments 5 5
550 ent 5 3
3078 raiatea 5 7
4188 ished 5 5
444 kamstra 4 7
6573 haugbev 4 7
5692 bea 4 3
1751 'twixt 4 6
6753 myrtie 4 6
255 isthe 4 5
281 iooo 4 4
339 numbensin 4 9
1554 gatmereth 4 9
6490 alloted 4 7
6292 wil 4 3
1405 accomodate 4 10
6388 ass't 4 5
6257 newago 4 6
5934 'is 4 3
398 stra 4 4
1263 igth 4 4
5953 'this 4 5
892 conven 4 6
1154 nee 4 3
876 medler 4 6
3525 christlicher 4 12
1873 gress 4 5
3287 afew 4 4
2951 pri 4 3
2981 committe 4 8
4321 whereever 4 9
4272 sionary 4 7
3162 wer 4 3
3977 onethird 4 8
3239 greenman 4 8
3306 terest 4 6
4795 editott 4 7
3354 ool 4 3
3797 tennesee 4 8
3424 ung 4 3
3671 'great 4 6
3656 nrws 4 4
3649 ject 4 4
3586 tes 4 3
4794 thi 4 3
2758 'school 4 7
1999 ful 4 3
5198 'them 4 5
5468 watchcare 4 9
2093 'us 4 3
2115 'are 4 4
2293 twentythree 4 11
3501 'work 4 5
5270 ebucattonal 4 11
5229 hevaid 4 6
5193 'at 4 3
4853 edu 4 3
5141 igan 4 4
2558 eachern 4 7
2562 lle 4 3
5117 selfsupporting 4 14
5062 'as 4 3
2680 reapeti 4 7
4932 mrse 4 4
6797 tencent 4 7
[213 rows x 3 columns]
In [23]:
title = 'YI'
print("Summary for {}:".format(title))
df = results_to_df(title)
results = query_df(df, 3, 2, 'count')
print(results)
Summary for YI:
spell_error count word_length
85050 sabbathschool 607 13
3429 'the 408 4
65800 'em 399 3
65897 eze 316 3
45605 xil 315 3
43104 ver 302 3
49812 sel 254 3
30373 tion 227 4
31970 mal 214 3
44178 'of 211 3
63298 agt 205 3
29354 'to 197 3
98016 'neath 180 6
44266 twentyfive 168 10
19616 'and 168 4
40114 ioo 159 3
20722 pre 152 3
22006 guire 151 5
25382 'he 149 3
2950 'cause 148 6
33873 iden 148 4
11323 'be 138 3
50376 goin 133 4
11590 ass't 131 5
70800 sangster 130 8
72425 s'pose 121 6
69839 milly 120 5
48903 yovt 112 4
15826 peloubet 110 8
96169 ome 110 3
32018 ful 109 3
72003 xiil 109 4
28669 hsi 109 3
44649 ettez 106 5
28039 stuttle 105 7
5881 ment 103 4
5158 lxv 102 3
44118 chas 101 4
2433 'in 100 3
28295 yovti 100 5
18041 lviii 99 5
30283 kibbin 99 6
41011 rosilla 97 7
86980 structor 92 8
83373 'his 91 4
12976 ili 90 3
96112 tle 90 3
24063 sha 90 3
48821 'mid 89 4
20073 'by 87 3
94355 lxiii 87 5
81501 georgie 84 7
22359 hutt 84 4
23894 micr 83 4
43960 it' 82 3
88334 'but 81 4
44704 onehalf 80 7
26674 ers 80 3
54236 'most 78 5
13223 r'y 77 3
53077 susy 77 4
60361 me' 76 3
33151 'round 76 6
42433 'have 74 5
82382 gertie 74 6
55587 howson 74 6
82149 'way 71 4
57145 'that 71 5
72128 seventhday 71 10
14816 kee 70 3
65375 'ye 70 3
70666 sundayschool 69 12
4832 'bout 69 5
79465 conybeare 68 9
38125 marden 68 6
25282 mis 67 3
58752 teddie 67 6
66264 riis 67 4
29182 nyassaland 67 10
54249 'is 67 3
26458 nanking 66 7
3084 neesima 64 7
15645 tions 64 5
91428 sabbathkeepers 64 14
27307 ther 64 4
66474 'was 62 4
31600 ole 62 3
33259 cunliffe 62 8
6227 zambesi 61 7
63809 ets 60 3
66094 soo 60 3
16738 'twill 60 6
40810 'had 60 4
70250 twentyfour 59 10
56162 ple 59 3
60997 ber 59 3
59428 liii 58 4
36197 gon 58 3
13131 'for 58 4
21637 sus 57 3
... ... ... ...
20425 months' 4 7
66786 nrt 4 3
66913 countri 4 7
66798 alie 4 4
66804 appius 4 6
66854 pleag 4 5
20832 whotn 4 5
66879 noss 4 4
66884 sophronia 4 9
20750 ccesarea 4 8
66938 feneberg 4 8
67170 ivas 4 4
20743 whenthe 4 7
20684 light' 4 6
20638 illfated 4 8
20587 'nom 4 4
20500 sanyasi 4 7
20481 sengers 4 7
67168 moung 4 5
65581 ansdell 4 7
65464 wih 4 3
63449 doren 4 5
64307 pressive 4 8
22753 ndt 4 3
64186 missio 4 6
64203 hoppy 4 5
22691 ungratified 4 11
22633 eath 4 4
64287 medhurst 4 8
64293 ninus 4 5
64308 schule 4 6
64072 culti 4 5
22624 auber 4 5
64357 leddy 4 5
22593 inmuotor 4 8
22559 aymar 4 5
22550 ifr 4 3
64522 cambo 4 5
22455 senales 4 7
64077 cuautemoch 4 10
63994 iolani 4 6
64603 abled 4 5
63747 thisis 4 6
63465 wic 4 3
63503 epworthian 4 10
63583 kap 4 3
23027 dolson 4 6
22963 coopersburgh 4 12
63693 blisses 4 7
63702 cellent 4 7
22962 shortland 4 9
63965 bertel 4 6
63807 sawa 4 4
22951 ister 4 5
22950 laon 4 4
63848 hattusil 4 8
22922 edvard 4 6
63900 paulonia 4 8
63944 nauplia 4 7
64577 namur 4 5
64700 lossing 4 7
65442 douly 4 5
21966 vealed 4 6
65091 squier 4 6
22076 adapte 4 6
65121 terial 4 6
22065 gohna 4 5
65201 trom 4 4
22021 ock 4 3
65218 nemorosa 4 8
65256 leutze 4 6
22078 gurdy 4 5
65265 titterington 4 12
21885 chriit 4 6
21875 rajputs 4 7
65359 orks 4 4
65371 printingoffice 4 14
21866 edny 4 4
65417 spiker 4 6
65079 jis 4 3
65031 think' 4 6
64745 'during 4 7
64881 constrainem 4 11
22405 vio 4 3
64764 playin 4 6
64769 fire' 4 5
64784 walburga 4 8
64806 representa 4 10
64858 tidende 4 7
64878 seveneighths 4 12
22331 lispings 4 8
22147 leadbetters 4 11
64907 gnd 4 3
64939 leontes 4 7
64941 worke 4 5
64942 ooks 4 4
64981 letow 4 5
64987 'cried 4 6
22233 eof 4 3
4 constraiheth 4 12
[6563 rows x 3 columns]
In [7]:
# %load shared_elements/system_info.py
import IPython
print (IPython.sys_info())
!pip freeze
{'commit_hash': '5c9c918',
'commit_source': 'installation',
'default_encoding': 'UTF-8',
'ipython_path': '/Users/jeriwieringa/miniconda3/envs/dissertation2/lib/python3.5/site-packages/IPython',
'ipython_version': '5.1.0',
'os_name': 'posix',
'platform': 'Darwin-16.1.0-x86_64-i386-64bit',
'sys_executable': '/Users/jeriwieringa/miniconda3/envs/dissertation2/bin/python',
'sys_platform': 'darwin',
'sys_version': '3.5.2 |Continuum Analytics, Inc.| (default, Jul 2 2016, '
'17:52:12) \n'
'[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]'}
anaconda-client==1.5.5
appnope==0.1.0
argh==0.26.1
blinker==1.4
bokeh==0.12.3
boto==2.43.0
bz2file==0.98
chest==0.2.3
cloudpickle==0.2.1
clyent==1.2.2
dask==0.12.0
datashader==0.4.0
datashape==0.5.2
decorator==4.0.10
docutils==0.12
doit==0.29.0
gensim==0.12.4
Ghost.py==0.2.3
ghp-import2==1.0.1
gspread==0.4.1
HeapDict==1.0.0
httplib2==0.9.2
husl==4.0.3
ipykernel==4.5.2
ipython==5.1.0
ipython-genutils==0.1.0
ipywidgets==5.2.2
Jinja2==2.8
jsonschema==2.5.1
jupyter==1.0.0
jupyter-client==4.4.0
jupyter-console==5.0.0
jupyter-core==4.2.1
llvmlite==0.14.0
locket==0.2.0
Logbook==1.0.0
lxml==3.5.0
MacFSEvents==0.7
Mako==1.0.4
Markdown==2.6.7
MarkupSafe==0.23
mistune==0.7.3
multipledispatch==0.4.9
natsort==4.0.4
nb-anacondacloud==1.2.0
nb-conda==2.0.0
nb-conda-kernels==2.0.0
nb-config-manager==0.1.3
nbbrowserpdf==0.2.1
nbconvert==4.2.0
nbformat==4.2.0
nbpresent==3.0.2
networkx==1.11
Nikola==7.7.7
nltk==3.2.1
notebook==4.2.3
numba==0.29.0
numpy==1.11.2
oauth2client==4.0.0
odo==0.5.0
pandas==0.19.1
partd==0.3.6
path.py==0.0.0
pathtools==0.1.2
pexpect==4.0.1
pickleshare==0.7.4
Pillow==3.4.2
prompt-toolkit==1.0.9
ptyprocess==0.5.1
pyasn1==0.1.9
pyasn1-modules==0.0.8
pycrypto==2.6.1
Pygments==2.1.3
PyPDF2==1.25.1
PyRSS2Gen==1.1
python-dateutil==2.6.0
pytz==2016.10
PyYAML==3.12
pyzmq==16.0.2
qtconsole==4.2.1
requests==2.12.3
rsa==3.4.2
scipy==0.18.1
simplegeneric==0.8.1
six==1.10.0
smart-open==1.3.5
terminado==0.6
textblob==0.11.1
toolz==0.8.1
tornado==4.4.2
traitlets==4.3.1
Unidecode==0.4.19
watchdog==0.8.3
wcwidth==0.1.7
webassets==0.11.1
widgetsnbextension==1.2.6
ws4py==0.3.4
xarray==0.8.2
Yapsy==1.11.223