AmSn-OCR-Evaluation-and-Correction
%load_ext autoreload
%autoreload 2
from text2topics import reports
from text2topics import utilities
from text2topics import clean
import re
import os
from os import listdir
from os.path import isfile, join
import collections
%matplotlib inline
wordlist_dir = "/Users/jeriwieringa/Dissertation/drafts/data/word-lists"
wordlists = ["2016-12-07-SDA-last-names.txt",
"2016-12-07-SDA-place-names.txt",
"2016-12-08-SDA-Vocabulary.txt",
"2017-01-03-place-names.txt",
"2017-02-14-Base-Word-List-SCOWL&KJV.txt",
"2017-02-14-Roman-Numerals.txt",
"2017-03-01-Additional-Approved-Words.txt"
]
spelling_dictionary = utilities.create_spelling_dictionary(wordlist_dir, wordlists)
title = "AmSn"
base_dir = "/Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/{}/".format(title)
Baseline¶
cycle = 'baseline'
stats = reports.overview_report(join(base_dir, cycle), spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/baseline Average verified rate: 0.9417475625771581 Average of error rates: 0.059656669650850494 Total token count: 8534424
errors_summary = reports.get_errors_summary( stats )
reports.top_errors( errors_summary, 100 )
[('-', 12650), ('ñ', 11855), ("'", 8075), ('tion', 6722), ('con-', 5879), ('re-', 5789), ('¥', 5128), ('t', 4124), ('ment', 4081), ('in-', 3962), (')', 3929), ('d', 3884), ('e', 3716), ('w', 3705), ('co', 3389), ('m', 3097), ('de-', 2928), ('com-', 2715), ('n', 2658), ('be-', 2447), ('pro-', 2149), ('sun-', 1953), ('f', 1919), ('ex-', 1874), ('chris-', 1869), ('*', 1867), ('r', 1832), ('tions', 1677), ('th', 1673), ('en-', 1576), ('dis-', 1563), ('govern-', 1495), ('(', 1448), ('gov-', 1339), ('g', 1311), ('per-', 1282), ('sab-', 1252), ('tian', 1181), ('mo', 1146), ('un-', 1141), ('na-', 1133), ('ernment', 1060), ('reli-', 1056), ('ance', 1023), ('ob-', 1011), ('pre-', 998), ('ments', 942), ('ad-', 937), ('ity', 935), ('sunday-law', 923), ('ac-', 911), ('tional', 911), ('ñthe', 905), ('u', 881), (']', 868), ('ligious', 836), ('im-', 815), ('ap-', 807), ('ple', 790), ('sub-', 738), ('x', 730), ('ence', 714), ('an-', 698), ('gious', 675), ('relig-', 665), ('ques-', 625), ('peo-', 623), ('ers', 621), ('at-', 601), ("'the", 596), ('al-', 586), ('as-', 576), ('inter-', 561), ('to-', 554), ('pub-', 546), ('them-', 544), ('fol-', 536), ('prin-', 522), ('constitu-', 520), ('ligion', 516), ('erty', 511), ('_', 511), ('/', 510), ('sup-', 507), ('for-', 498), ('tianity', 490), ('au-', 484), ('stitution', 475), ('coun-', 472), ('ious', 464), ('the-', 460), ('em-', 453), ('pur-', 447), ('observ-', 446), ('cath-', 441), ('any-', 439), ('amer-', 437), ('lib-', 430), ('gress', 425), ('there-', 414), ('sev-', 411), ('legisla-', 410), ('[the', 409), ('with-', 394), ('wor-', 386), ('legis-', 381), ('--', 381), ('sen-', 380), ('or-', 379), ('op-', 375), ('pa', 375), ('ican', 375), ('mis-', 373), ('ent', 372), ('consti-', 366), ('estab-', 365), ('ful', 355), ('servance', 354), ('man-', 353), ('under-', 352), ('cer-', 351), ('lished', 351), ('lation', 351), ('es-', 350), ('q', 350), ('chain-gang', 349), ('lic', 345), ('ble', 344), ('di-', 343), ('pres-', 342), ('tution', 342), ('ber', 341), ('gion', 339), ('can-', 338), ('ña', 338), ('%', 335), ('ar-', 334), ('princi-', 332), ('ture', 326), ('move-', 325), ('tive', 321), ('ous', 316), ('gen-', 316), ('olic', 315), ('sunday-closing', 314), ("the'", 312), ('thority', 312), ('prot-', 311), ('ject', 310), ('command-', 305), ('his-', 303), ('tians', 300), ('ical', 297), ('repre-', 295), ('su-', 295), ('eral', 295), ('se-', 293), ('so-', 292), ('ual', 291), ('=', 291), ('par-', 291), ('mat-', 290), ('ation', 290), ('k', 288), ('state-', 288), ('author-', 287), ('him-', 285), ('-the', 285), ('sunday-', 283), ('amend-', 282), ('sunday-rest', 281), ('pp', 281), ('cor-', 281), ('indi-', 281), ('¥¥', 281), ('ples', 280), ('ciples', 277), ('ñthat', 277), ('ex', 275), ('ameri-', 271), ('institu-', 268), ('mem-', 268), ('every-', 266), ('ch', 266), ('ists', 265), ('ma-', 262), ('neces-', 262), ('ñno', 260), ('ters', 260), ('of-', 259), ('senti-', 259), ('sec-', 258), ('pa-', 257), ('seventh-', 256), ('work-', 256), ('po-', 253), ('some-', 251), ('doc-', 247), ('ciple', 246), ('sim-', 243), ('pos-', 242), ('(the', 240), ('busi-', 238), ('ary', 237), ("'of", 236), ('char-', 235), ('evi-', 233), ('follow-', 233), ('perse-', 231), ('mo-', 230), ('insti-', 229), ('mand', 229), ('lieve', 228), ("conscience'", 228), ('ã', 226), ('recog-', 226), ('bers', 225), ('num-', 221), ('prac-', 221), ('leg-', 219), ('tained', 219), ('ñand', 218), ('mitted', 217), ('suc-', 216), ('what-', 216), ('**', 216), ('ga', 216), ('individ-', 216), ('out-', 214), ('oc', 214), ('sition', 213), ('free-', 213), ('mandment', 212), ('wm', 212), ('accord-', 211), ('how-', 210), ('prop-', 210), ('pel', 210), ('main-', 206), ('munn', 206), ('fellow-citizens', 204), ('satolli', 204), ('ten-', 203), ('rec-', 203), ('preme', 201), ('ab-', 201), ('king-', 200), ('***', 198), ('over-', 198), ('christian-', 197), ('hu-', 196), ('sented', 193), ('yo', 193), ('col-', 192), ('sions', 190), ('noth-', 190), ('illus-', 188), ('ure', 188), ('citi-', 187), ('fur-', 187), ('eng-', 186), ('right-', 185), ('min-', 184), ('tures', 184), ('z', 184), ('associa-', 184), ('stand-', 183), ('sur-', 183), ('exer-', 182), ('ry', 182), ('advent-', 182), ('tainly', 182), ('argu-', 182), ('sible', 182), ('tem-', 180), ('invari-', 179), ('employes', 178), ('catho-', 176), ('protest-', 176), ('thou-', 175), ('car-', 175), ('accom-', 175), ('dred', 174), ('ti', 173), ('judg-', 173), ('persecu-', 173), ("'a", 172), ('self-', 172), ('struction', 172), ('teach-', 171), ('ther', 170), ('inde-', 170), ('conse-', 170), ('ca', 170), ('dence', 170), ('dividual', 169), ('vio-', 168), ('posi-', 167), ('ñthomas', 167), ('co-', 167), ('ro-', 166), ('mittee', 166), ('`', 165), ('sary', 165), ('-of', 164), ('tinel', 164), ('ñi', 162), ('ñit', 162), ('af-', 161), ('hun-', 160), ('rep-', 160), ('(see', 159), ('attorney-general', 159), ('al', 159), ('organ-', 159), ('un-american', 156), ('trans-', 155), ('tary', 155), ('rest-day', 154), ('estant', 154), ('asso-', 153), ('(entered', 153), ('mony', 153), ('law-abiding', 152), ('rea-', 152), ('sys-', 152), ("'to", 152), ('des-', 152), ('uni-', 151), ('ference', 150), ('mc', 149), ("'and", 149), ('ceived', 148), ('enforce-', 148), ('bap-', 148), ('conven-', 148), ('har-', 147), ('presi-', 147), ('cen-', 147), ('law-', 146), ('differ-', 146), ('ñrev', 146), ('acter', 146), ('ciation', 146), ('sa-', 145), ('[from', 145), ('peti-', 145), ('states-', 145), ('vention', 145), ('scrip-', 144), ('fied', 144), ('jus-', 144), ('trol', 144), ('sabbath-breaking', 144), ('dif-', 143), ('clared', 142), ('religio-political', 142), ('cir-', 142), ('representa-', 141), ('protes-', 141), ('ish', 141), ('vidual', 141), ('ventists', 141), ('ular', 141), ('organiza-', 141), ('ist', 140), ('minis-', 140), ('tation', 140), ('duced', 139), ('(and', 139), ('re', 139), ('tains', 138), ('post-', 138), ('instruc-', 137), ('id', 137), ('dren', 137), ('cial', 137), ('terest', 136), ('influ-', 136), ('ished', 136), ('trated', 136), ('non-', 136), ('mands', 136), ('enth-day', 136), ("to'", 134), ('ered', 133), ('chil-', 133), ('cially', 133), ('ô', 132), ('educa-', 132), ('pros-', 132), ('intro-', 132), ('no-', 132), ('trary', 132), ('meet-', 131), ('fa-', 131), ('ica', 130), ('pun-', 130), ('cently', 130), ('mar-', 129), ('quired', 129), ('vest-pocket', 129), ('nessee', 128), ('denomina-', 128), ('ilar', 128), ('se', 128), ('jority', 128), ('manded', 127), ('ll', 127), ('-to', 127), ('tives', 127), ('pelled', 126), ('rian', 126), ('dition', 126), ('thatñ', 126), ('ated', 125), ('¡', 125), ('cise', 125), ('litical', 125), ('nal', 125), ('lish', 125), ('wash-', 124), ('cated', 124), ('mit', 124), ('parlia-', 124), ('consid-', 124), ('sug-', 123), ('olics', 123), ('tem', 122), ('ñnew', 122), ('ures', 122), ('secution', 122), ('cept', 121), ('news-', 121), ('hon-', 121), ('ized', 120), ('establish-', 120), ('eousness', 120), ('il', 119), ('nd', 119), ('oc-', 119), ('advo-', 118), ('cution', 118), ('edu-', 118), ('serv-', 118), ('islation', 118), ('counter-arguments', 117), ('rela-', 117), ('sabbath-', 116), ('mainte-', 116), ('cmsar', 116), ('condi-', 115), ('do-', 115), ('conscien-', 115), ('resolu-', 115), ('chi-', 115), ('ances', 115), ('sat-', 114), ('lb', 114), ('ingly', 114), ('sin-', 114), ('polit-', 114), ('stitutions', 113), ('ington', 113), ('sabbath-day', 113), ('refer-', 113), ('pression', 112), ('stat-', 112), ('nounced', 112), ('gos-', 112), ('fellow-', 112), ('aleck', 112), ('apos-', 112), ("'i", 111), ('spect', 111), ('cerning', 111), ('sunday-keeping', 110), ('circum-', 110), ('\\', 110), ('libertyñchristian', 109), ('ciety', 109), ("'in", 109), ('nel', 109), ('sus-', 109), ('non-sectarian', 108), ("and'", 108), ('bateham', 107), ('reform-', 107), ('appro-', 107), ('tant', 107), ('forcement', 107), ('%x', 107), ('ies', 107), ('deter-', 106), ('dan-', 106), ('politi-', 106), ('sentin', 106), ('mandments', 106), ('ñjesus', 106), ('pe-', 106), ('espe-', 105), ('suf-', 105), ('is-', 105), ('ñto', 105), ('ñin', 104), ('estants', 104), ('eration', 104), ('depart-', 104), ("'is", 104), ('prom-', 104), ('ning', 104), ('meth-', 103), ('well-', 103), ('cc', 103), ('ne-', 103), ('funda-', 103), ('cussion', 103), ('ñnot', 103), ('•', 102), ('milly', 102), ('ern', 102), ('tered', 102), ('neigh-', 102), ('righteous-', 102), ('pol-', 102), ('sity', 102), ('pendence', 102), ('sanc-', 101), ('sionary', 101), ('cago', 101), ('possi-', 101), ('nity', 101)]
Correction 1 -- Special Characters¶
The first common error appears to be dangling line endings. However, in order to best capture and fix those endings, I am first normalizing the line ending characters and address special characters. Before removing, I will check for regular non-English language use to see if there are particular characters that should be preserved.
reports.tokens_with_special_characters(errors_summary)
[('ñ', 11855), ('¥', 5128), (')', 3929), ('*', 1867), ('(', 1448), ('ñthe', 905), (']', 868), ('_', 511), ('/', 510), ('[the', 409), ('ña', 338), ('%', 335), ('=', 291), ('¥¥', 281), ('ñthat', 277), ('ñno', 260), ('(the', 240), ('ã', 226), ('ñand', 218), ('**', 216), ('***', 198), ('ñthomas', 167), ('`', 165), ('ñi', 162), ('ñit', 162), ('(see', 159), ('(entered', 153), ('ñrev', 146), ('[from', 145), ('(and', 139), ('ô', 132), ('thatñ', 126), ('¡', 125), ('ñnew', 122), ('\\', 110), ('libertyñchristian', 109), ('%x', 107), ('ñjesus', 106), ('ñto', 105), ('ñin', 104), ('ñnot', 103), ('•', 102), ('`the', 98), ('ñchristian', 96), ('-¥', 94), ('ñany', 90), ('(a', 85), ('(which', 80), ('[', 80), ('(or', 78), ('(as', 78), ('ñbut', 73), ('¤', 72), ('ñby', 71), ('(new', 68), ('(for', 68), ('(in', 67), ('[of', 67), ('ñis', 66), ('[sunday]', 66), ('ñas', 66), ('ñwhich', 65), ('libertyñchris-', 65), ('<', 64), ('ñif', 64), ('the¥', 64), ('+', 63), ('ñan', 62), ('ñthis', 62), ('ñyes', 61), ('(sunday)', 60), ('[in', 60), ('¥¥¥', 59), ('—', 58), ('(n', 58), ('(i', 57), ('ñfor', 56), ('¥the', 55), ('(rev', 54), ('(to', 52), ('ñwhy', 51), ('(baptist)', 50), ('ñwe', 50), ('ñharvard', 50), ('ñall', 49), ('ñc', 49), ('¥-', 49), ('ñn', 49), ('ñhow', 49), ('(not', 49), ('(mich', 48), ('ñwhen', 48), ('ñwhat', 48), ('sentinel_', 46), ('day)', 46), ('ñhe', 46), ('excepted)', 46), ('(rom', 45), ('ñthey', 45), ('ñsigns', 45), ('(concluded', 45), ('(size', 45), ('(italics', 43), ('~~', 42), ('(john', 42), ('[mr', 42), ('(second', 42), ('#', 42), ('£', 42), ('ó', 41), ('(matt', 40), ('ñthere', 40), ('ñfrom', 39), ('[new', 39), ('[this', 39), ('inches)', 38), ('(acts', 38), ('\ufeff', 37), ('ñbecause', 37), ('¦', 37), ('to¥', 37), ('ñcatholic', 36), ('(with', 36), ('(if', 36), ('[by', 36), ('[or', 36), ('(page', 35), ('(works', 35), ('ñyou', 35), ('sabbathñthe', 35), ('ñoñ', 35), ('¥-¥', 35), ('ñid', 34), ('(p', 34), ('ñone', 34), ('york)', 33), ('ñthen', 33), ('[not', 33), ('ñwho', 33), ('(dyspeptic)', 32), ('worldñto', 32), ('~', 32), ('c)', 32), ('ñpage', 32), ('ñreligious', 31), ('notes)', 31), ('ñcertainly', 31), ('¥of', 31), ('ñor', 31), ('catholic)', 30), ('(saturday)', 30), ('ñpresent', 30), ('ñof', 30), ('ñpresbyterian', 29), ('(continuing', 29), ('}', 29), ('ñmr', 29), ('ñon', 29), ('(tenn', 29), ('(of', 29), ('(roman', 29), ('ñwith', 29), ('(r', 29), ('lawñthe', 29), ('[we', 29), ('church)', 28), ('ñjottings', 28), ('(mass', 28), ('sunday)', 28), ('(pa', 28), ('(except', 28), ('(a)', 27), ('>', 27), ('(no', 27), ('bookñ', 27), ('(exact', 27), ('catholics]', 26), ('ñst', 26), ('¥¥¥¥', 25), ('and¥', 25), ('(ps', 25), ('ñsabbath', 25), ('(that', 25), ('split)', 25), ('(who', 25), ('church]', 25), ('bibleñ', 25), ('(from', 25), ('ñ]', 24), ('(mr', 24), ('(capillary', 24), ('[a', 24), ('^', 23), ('(b)', 23), ('thisñourñour', 23), ('(this', 23), ('(though', 23), ('(cal', 23), ('a¥', 23), ('the_', 23), ('specimen)', 23), ('[that', 23), ('(luke', 23), ('ñjohn', 23), ('attraction)', 23), ('ñwill', 22), ('of¥', 22), ('(late', 22), ('refused)', 22), ('churchñ', 22), ('ñdo', 22), ('a_', 22), ('[italics', 22), ('(heb', 22), ('pages)', 22), ('[to', 22), ('(by', 22), ('(once', 22), ('(col', 21), ('a)', 21), ('[roman', 21), ('ñsimply', 21), ('(london', 21), ('self=pronouncing', 21), ('(dan', 21), ('ñsuch', 21), ('(isa', 21), ('`we', 21), ('mealñ(an)', 21), ('godñthe', 21), ('ñpeoplesñthat', 21), ('_the', 21), ('missionaryñby', 20), ('(c)', 20), ('ñwell', 20), ('(catholic)', 20), ('(over', 20), ('(gen', 20), ('[applause', 20), ('recipesñpost-paid', 20), ('it)', 19), ('ñjames', 19), ('itñ', 19), ('coã', 19), ('plorerñby', 19), ('ñdr', 19), ('¥a', 19), ('`and', 19), ('ñare', 19), ('sentinel¥', 19), ('ñhas', 19), ('ñw', 19), ('[christ]', 19), ('ñsan', 19), ('ñonly', 19), ('guineañby', 18), ('[sunday', 18), ('ñwould', 18), ("'¥", 18), ('_american', 18), ('[and', 18), ('(verse', 18), ('ñeditor', 18), ('ñthese', 18), ('ñthose', 18), ('`i', 18), ('(puritan)', 18), ('(we', 18), ('ñnellie', 18), ('christian(', 18), ('ñat', 18), ('statesñthe', 18), ('[mention', 18), ('chairmanñ', 17), ('is¥', 17), ('churchñthe', 17), ('(having)', 17), ('ñchicago', 17), ('city)', 17), ('(based', 17), ('sentinel)', 17), ('i)', 17), ('congoñby', 17), ('ñspringfield', 17), ('ñmen', 17), ("cushing's)", 17), ('¥and', 17), ('(chicago)', 17), ('ñmrs', 17), ('¥in', 17), ('ñshe', 17), ('ñhistory', 17), ('ñen', 17), ('û', 17), ('edition)', 17), ('sunday]', 17), ('(i)', 16), ('ñso', 16), ("ñman's", 16), ('godñ', 16), ('ñgreat', 16), ('ñhis', 16), ('`it', 16), ('(illustrated)', 16), ('-*', 16), ('(eph', 16), ('ñwas', 16), ('ñoh', 16), ('ñlet', 16), ('(applause', 16), ('ñjust', 16), ('~~¥', 16), ('nigerñ', 16), ('numbers)', 16), ('melanesiañby', 16), ('ñbaptist', 15), ('t)', 15), ('♦', 15), ('missionsñby', 15), ('in¥', 15), ('a¥nd', 15), ('statesñ', 15), ('ñp', 15), ('ñking', 15), ('day]', 15), ('religionñthe', 15), ('*-', 15), ('(mark', 15), ('state]', 15), ('(continued', 15), ('law]', 15), ('(including', 15), ('a_n', 15), ('isñ', 15), ('governmentñthe', 15), ('mangañby', 14), ('(water', 14), ('sundayñ', 14), ('(minn', 14), ("'ñ", 14), ('ñsunday', 14), ('independenceñthe', 14), ('(ind', 14), ('page)', 14), ('ñu', 14), ('ñfood', 14), ('ñandñ', 14), ('l¥', 14), ('¥*', 14), ('nineteenthñwill', 14), ('¥to', 14), ('washington)', 14), ('¥that', 14), ('andñ', 14), ('in_', 14), ('ñsunday-law', 14), ('concordanceña', 14), ('villageñone', 14), ('ñart', 14), ('ñh', 14), ('()', 14), ('ñgod', 14), ('*from', 14), ('ñevangel', 14), ('catholic]', 14), ('for¥', 14), ('sundayñthe', 14), ('ñaddressñ', 14), ('helpsñembracing', 13), ('ñboston', 13), ('powerñthe', 13), ('(sunday', 13), ('manñthe', 13), ('`to', 13), ('truthñthe', 13), ('law)', 13), ('ñeven', 13), ('(london)', 13), ('le/vites', 13), ('landsñby', 13), ('r¡', 13), ('[christian', 13), ('extras)', 13), ("ñ'", 13), ('[catholic]', 13), ('appealñnational', 13), ('ñwhether', 13), ('`¥', 13), ('beñhow', 13), ('invadedñthe', 13), ('dayñthe', 13), ('dayñand', 13), ('(ex', 13), ('(fleming', 13), ('ñmay', 13), ('[laughter', 13), ('government)', 13), ('ñbible', 12), ('(but', 12), ('ãã', 12), ('(about', 12), ('(s', 12), ('empireñwhat', 12), ('_a', 12), ('(gal', 12), ('_of', 12), ('ñgeorge', 12), ('<at', 12), ('christñthe', 12), ('/dominion', 12), ('as¥', 12), ('worldñthe', 12), ('politicalñthomas', 12), ('`for', 12), ('helpsñnumerous', 12), ('(he', 12), ('(neb', 12), ('stateñthe', 12), ('illustrationsñsplendid', 12), ('ñdid', 12), ('i¥', 12), ('uaryña', 12), ('allñthe', 11), ('mapsñconcordanceñsubject', 11), ('{', 11), ('(presbyterian)', 11), ('*the', 11), ('states]', 11), ('ñsamuel', 11), ('ñmoral', 11), ('historyñby', 11), ('state)', 11), ('`this', 11), ("¥'", 11), ('(it', 11), ('or¥', 11), ('them)', 11), ('lawñ', 11), ('ñnamely', 11), ('re_', 11), ('[for', 11), ('¥i', 11), ('thisñthe', 11), ('peopleñthe', 11), ('(ill', 11), ('be)', 11), ('sabbath)', 11), ('ñbishop', 11), ('(delivered', 11), ('ñde', 11), ('therein)', 11), ('worldñ', 11), ('ñnow', 11), ('chinañby', 11), ("(gentleman's", 11), ('cover)', 11), ('ñtheñ', 11), ('(methodist)', 11), ('size)', 11), ('indexñvocabu-', 10), ('(d)', 10), ('/and', 10), ('sentinel]', 10), ('(eze', 10), ('ñfaith', 10), ('ñindependent', 10), ('ñwhere', 10), ('ñwere', 10), ('la*', 10), ('states)', 10), ('/ah', 10), ('ñalsoñ', 10), ('(specimen', 10), ('ñreligion', 10), ('`an', 10), ('ha/rim', 10), ('(an', 10), ('more)', 10), ('(pp', 10), ('churchesñas', 10), ('ñj', 10), ('__', 10), ('(the)ñby', 10), ('religionñand', 10), ('``', 10), ('ñreview', 10), ('bookñthe', 10), ('(alexander', 10), ('[present', 10), ('nomñany', 10), ('this¥', 10), ('postñoffice', 10), ('ç', 10), ('con_', 10), ('`if', 10), ('(such', 10), ('themñ', 10), ('peoplesñ', 10), ('bible]', 10), ('%c', 10), ('others)', 10), ('governmentñ', 10), ("')", 10), ('ñs', 10), ('`a', 10), ('him)', 10), ('godñand', 10), ('`that', 10), ('ñsome', 10), ('[is]', 10), ('(two-thirds', 10), ('(st', 10), ('morality]', 10), ('and_', 10), ('campbell)', 10), ('*this', 9), ('¥be', 9), ('adventist)', 9), ('manñ', 9), ('union)', 9), ('`no', 9), ('(so', 9), ('ñamerican', 9), ('itñand', 9), ('[see', 9), ('e¥', 9), ('ñliberty', 9), ('(at', 9), ('[his', 9), ('`almost', 9), ('ñfrances', 9), ('cents)', 9), ('peopleña', 9), ('¥by', 9), ('ñkatherine', 9), ('ñchurch', 9), ('ñyour', 9), ('ñnothing', 9), ('(one', 9), ('ñsee', 9), ('(art', 9), ('stateñ', 9), ('ñdoes', 9), ('baptist)', 9), ('(iowa)', 9), ('earthñthe', 9), ('be¥', 9), ('(poetry', 9), ('ñsir', 9), ('itñthe', 9), ('was¥', 9), ('ñtheir', 9), ('papacy]', 9), ('ñlondon', 9), ('ñselected', 9), ('pope]', 9), ('[as', 9), ('ñupon', 9), ('ñcan', 9), ('libertyñthe', 9), ('homeñ', 9), ('of_', 9), ('constitution)', 8), ('(signed)', 8), ('gospelñthe', 8), ('ñofficial', 8), ('beñ', 8), ('do)', 8), ('their¥', 8), ('ñjudge', 8), ('oneñthe', 8), ('peopleñ', 8), ('[small]', 8), ('laws)', 8), ('`yes', 8), ('large]', 8), ('watchwordñthe', 8), ('weekñthe', 8), ('=the', 8), ('weightñwithout', 8), ('speedñwonderful', 8), ('keysñthirty', 8), ('sentinelñdear', 8), ('societyñmrs', 8), ('(some', 8), ('wasñ', 8), ('(their', 8), ('offerñby', 8), ('menñ', 8), ('governmentñthat', 8), ('(v', 8), ('(james', 8), ('touchñlight', 8), ('[here', 8), ('(micr', 8), ('constitutionalñfrom', 8), ('keyboardñuniversal', 8), ('lawñthat', 8), ('alvierica_n', 8), ('ñloyal', 8), ('¥¥-¥', 8), ('ñgold', 8), ('ñthough', 8), ('\\ttin', 8), ('ñchris-', 8), ('ñcolorado', 8), ('that_', 8), ('*as', 8), ('ñpublius', 8), ('[large]', 8), ('is)', 8), ('ñour', 8), ('churches)', 8), ('_to', 8), ('ñmelbourne', 8), ('siredñmedium', 8), ('re¥', 8), ('case)', 8), ('cut)', 8), ('°', 8), ('(seventh-day', 8), ('sayñ', 8), ('¥¥¥¥¥', 8), ('copyrighted)', 8), ('constitution]', 8), ('(papacy)', 8), ('alignmentñperfect', 8), ('ñsilver', 8), ('(brown)', 8), ('(without', 8), ('languagesñseven', 8), ('(h', 8), ('sabbath]', 8), ('course)', 8), ('(still', 8), ('(february', 8), ('`one', 8), ('patenteeñto', 8), ('ñevery', 8), ('(bourgeois)', 8), ('godñin', 8), ('\\t', 8), ('week)', 8), ('¥-¥-¥', 8), ('wheelñsteel', 8), ('(eng', 8), ('ñsel', 8), ('sabbathñis', 8), ('christians)', 8), ('governmentña', 8), ('romeñthe', 8), ('[loud', 8), ('[cheers', 8), ('(unless', 8), ('ours)', 7), ('semiteñand', 7), ('ñfree', 7), ('(laughter', 7), ('people)', 7), ('¥-¥¥', 7), ('(wis', 7), ('(according', 7), ('nationñthe', 7), ('dayñ', 7), ('christianityñthe', 7), ('i/', 7), ('country)', 7), ('to_', 7), ('©', 7), ('labor]', 7), ('ñdecline', 7), ('(conclusion', 7), ('god)', 7), ('(boston)', 7), ('we¥', 7), ('beast]', 7), ('ñthree', 7), ('man)', 7), ('(unitarian)', 7), ('¥but', 7), ('ñmy', 7), ('say)', 7), ('[very', 7), ('(hear', 7), ('(-', 7), ('wordsñ', 7), ('all)', 7), ('ñgibbon', 7), ('(working', 7), ('(when', 7), ('_that', 7), ('(although', 7), ('world)', 7), ('self=cleaning', 7), ('historyñthemes', 7), ('libertyñ', 7), ('be_', 7), ('sabbathñ', 7), ('but]', 7), ('ñshall', 7), ('(lev', 7), ('[meaning', 7), ('religionñ', 7), ('(it)', 7), ('(on', 7), ('-_', 7), ('[n', 7), ('*i', 7), ('ñafter', 7), ('nationñwhat', 7), ('iã', 7), ('-)', 7), ('constitutionñthe', 7), ('ñjewish', 7), ('peopleñand', 7), ('theñ', 7), ('(phil', 7), ('have¥', 7), ('(civil', 7), ('bathñthe', 7), ('matterñas', 7), ('ñdetroit', 7), ('ñcol', 7), ('(they', 7), ('*a', 7), ('godña', 7), ('„', 7), ('(chap', 7), ('moral]', 7), ('mcallisterñno', 7), ('ca_n', 7), ('so)', 7), ('ñlessons', 7), ('[which', 7), ('ñpublishers)', 7), ('tionñthe', 7), ('people]', 7), ("te(ichelis'", 7), ('-(', 7), ('stateña', 7), ('lawñto', 7), ('(section', 7), ('themñand', 7), ('ñhave', 7), ('[hear', 7), ('that¥', 7), ('ñhad', 7), ('m¥', 7), ('[special', 7), ('papacyñthe', 7), ('toast)', 7), ('tionñwhat', 7), ('ö', 7), ('[made', 7), ('(t', 7), ('`you', 7), ('`my', 7), ('menñthe', 7), ('_and', 7), ('menña', 7), ('johnsonñ', 7), ('thisñthat', 7), ('[god]', 7), ('✓', 7), ('ñsince', 7), ('organ)', 7), ('dayña', 7), ('(now', 7), ('[cries', 7), ('no*', 7), ('[it', 7), ('(all', 6), ('ñaddress', 6), ('america_n', 6), ('way)', 6), ('ñprice', 6), ('[worship]', 6), ('themñthat', 6), ('christianityñ', 6), ('ñadvent', 6), ('not)', 6), ('badñ(having)', 6), ('g)', 6), ('ñsurroundings', 6), ('ñindeed', 6), ('sundayña', 6), ('(two', 6), ('a\\', 6), ('`our', 6), ('(mo', 6), ('¥or', 6), ('ñcloth', 6), ('stateñby', 6), ('ci)', 6), ('ñpaul', 6), ('born)', 6), ('%/', 6), ('society)', 6), ('done)', 6), ('¥for', 6), ('(vol', 6), ('ñgalen', 6), ('o)', 6), ('tian(', 6), ('e)', 6), ('ñstate', 6), ('ñyea', 6), ('ñshould', 6), ('[sun-', 6), ('stã', 6), ("jonesñ'", 6), ('¥`', 6), ('ñspain', 6), ('[i', 6), ('ñminnie', 6), ('saysñ', 6), ('peopleñthat', 6), ('ñwhile', 6), ('congressñthe', 6), ('`sunday', 6), ('`in', 6), ('[if', 6), ('faithñ', 6), ('(whether', 6), ('/-', 6), ('ñjoaquin', 6), ('rica_n', 6), ('(like', 6), ('margin]', 6), ('ñgood', 6), ('¥is', 6), ('laws]', 6), ('`is', 6), ('ñofñ', 6), ('countryñthe', 6), ('statesñreligious', 6), ('adventists)', 6), ('faithñneeds', 6), ('[st', 6), ('safe]', 6), ('a_ivierican', 6), ('-¥-¥', 6), ('~¥', 6), ('power]', 6), ('countryñ', 6), ('excellentñchristian', 6), ('thingsñthe', 6), ('butñ', 6), ('ñibid', 6), ('(texas)', 6), ('i*', 6), ('(code', 6), ('(may', 6), ('dayñare', 6), ('<<', 6), ('wordsña', 6), ('ñprof', 6), ('lawñis', 6), ('ñdear', 6), ('[under', 6), ('statesñand', 6), ('s\x8eance', 6), ('societyñl', 6), ('ñnever', 6), ('ñunited', 6), ('revealedñthat', 6), ('ñmost', 6), ('libraryñ', 6), ('ñpp', 6), ('r¥', 6), ('worship)', 6), ('crime(', 6), ('government]', 6), ('(sun-', 6), ('ñblind', 6), ('¥with', 6), ('journal==', 6), ('sentinelñ', 6), ('¥¥¥¥¥¥', 6), ('it¥', 6), ('/i', 6), ('caesarñthe', 6), ('churchñto', 6), ('ñcharles', 6), ('ñed', 6), ('ñdayton', 6), ('(even', 6), ('(verses', 6), ('¥are', 6), ('margin)', 6), ('kindñwithin', 6), ('anierica_n', 6), ('peopleñis', 6), ('authorityñthe', 6), ('congress)', 6), ('`god', 6), ('religion)', 6), ("ñd'aubigne", 6), ('addressñ', 6), ('rome]', 6), ('(catholic', 6), ('god]', 6), ('-¥¥', 6), (')-', 6), ('religion]', 6), ("o'neil]", 6), ('(md', 6), ('useñexcellent', 6), ('itñto', 6), ('ours]', 6), ('ò', 6), ('b¥', 6), ('beña', 6), (')f', 6), ('`remember', 6), ('¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥', 6), ('ñfair', 6), ('ha¥s', 6), ('(under', 6), ('hear)', 6), ('(ohio)', 6), ('(december', 6), ('raiira¥', 6), ('¥on', 6), ('(kansas)', 6), ('¨', 6), ('ñd', 5), ('(showing', 5), ('[living]', 5), ('de¥', 5), ('adventists]', 5), ('ôc', 5), ('papacyñthat', 5), ('on¥', 5), ('manualñexpert', 5), ('churches]', 5), ('days)', 5), ('calculatorñliterary', 5), ('christian)', 5), ('crafts)', 5), ('(jer', 5), ('ho)', 5), ('should¥', 5), ('geary]', 5), ('i(', 5), ('christñ', 5), ('/s', 5), ('man¥', 5), ('nuisanceñthe', 5), ('reform]', 5), ('asñ', 5), ('father]', 5), ('especiallyñwriters', 5), ('ñeither', 5), ('*¥', 5), ('ñhere', 5), ('ñabout', 5), ('bibleñand', 5), ('ment)', 5), ('addressñpacific', 5), ("(milman's", 5), ('ñsalesmen', 5), ("'ñthe", 5), ('¥as', 5), ('ñpresident', 5), ('`t', 5), ('ñgo', 5), ('keyñthe', 5), ('[at', 5), ('ñharvest', 5), ('(copyrighted)', 5), ('priceñattractive', 5), ('ñmargaret', 5), ('liberty]', 5), ('f¥', 5), ("'`", 5), ('himñ', 5), ('principles¥', 5), ('(order', 5), ('ñdean', 5), ('power)', 5), ('[these', 5), ('societyña', 5), ('principlesñthe', 5), ('ha/nan', 5), ('beastñthe', 5), ('sab_', 5), ('ñmail', 5), ('`as', 5), ('[accounted', 5), ('ñhalf', 5), ('iñ', 5), ('-¥-', 5), ('christ)', 5), ('powerñ', 5), ('his¥', 5), ('(companion', 5), ('firstñthe', 5), ('are¥', 5), ("¡'", 5), ...]
No obvious foreign language character use. "ñ" appears attached to words as an OCR noise element.
# %load shared_elements/normalize_characters.py
prev = "baseline"
cycle = "correction1"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
# Substitute for all other dashes
content = re.sub(r"—-—–‑", r"-", content)
# Substitute formatted apostrophe
content = re.sub(r"\’\’\‘\'\‛\´", r"'", content)
# Replace all special characters with a space (as these tend to occur at the end of lines)
content = re.sub(r"[^a-zA-Z0-9\s,.!?$:;\-&\'\"]", r" ", content)
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction1 Average verified rate: 0.9493256707400606 Average of error rates: 0.051667561921814396 Total token count: 8518613
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 500 )
[('-', 13223), ("'", 8384), ('tion', 6765), ('con-', 5889), ('re-', 5797), ('t', 4325), ('ment', 4118), ('in-', 3972), ('d', 3947), ('e', 3885), ('w', 3760), ('co', 3427), ('m', 3164), ('n', 2998), ('de-', 2931), ('com-', 2716), ('be-', 2455), ('pro-', 2155), ('f', 2010), ('r', 1978), ('sun-', 1967), ('chris-', 1947), ('ex-', 1880), ('th', 1703), ('tions', 1683), ('en-', 1577), ('dis-', 1566), ('govern-', 1495), ('g', 1356), ('gov-', 1340), ('per-', 1284), ('sab-', 1254), ('tian', 1190), ('mo', 1156), ('un-', 1145), ('na-', 1136), ('ernment', 1070), ('reli-', 1058), ('ance', 1036), ('ob-', 1011), ('pre-', 999), ('ments', 947), ('ity', 944), ('sunday-law', 940), ('ad-', 939), ('tional', 914), ('u', 913), ('ac-', 913), ('x', 860), ('ligious', 838), ('im-', 816), ('ap-', 811), ('ple', 798), ('sub-', 740), ('ence', 722), ('an-', 700), ('gious', 675), ('relig-', 666), ('ers', 628), ('ques-', 625), ('peo-', 625), ('at-', 603), ("'the", 600), ('al-', 588), ('as-', 578), ('inter-', 562), ('to-', 556), ('pub-', 550), ('them-', 545), ('fol-', 536), ('ligion', 523), ('prin-', 523), ('constitu-', 521), ('erty', 518), ('sup-', 508)]
Correction 2 -- Correct line endings¶
Reconnect words that were split due to line-endings.
# %load shared_elements/correct_line_endings.py
prev = cycle
cycle = "correction2"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
content = re.sub(r"(\w+)(\-\s{1,})([a-z]+)", r"\1\3", content)
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction2 Average verified rate: 0.9777065723174374 Average of error rates: 0.024393763055804242 Total token count: 8357037
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 250 )
[('-', 13127), ("'", 8384), ('t', 4309), ('d', 3941), ('e', 3874), ('w', 3758), ('co', 3424), ('m', 3155), ('n', 2990), ('f', 1989), ('r', 1971), ('th', 1703), ('g', 1339), ('mo', 1156), ('sunday-law', 976), ('u', 911), ('x', 860), ("'the", 600), ('--', 418), ('pa', 415), ('q', 373), ('chain-gang', 353), ('sunday-closing', 335), ('tion', 327), ("the'", 313), ('k', 304), ('ex', 304), ('sunday-rest', 299), ('pp', 298), ('-the', 289), ("conscience'", 265), ('ch', 257)]
Correction 3 -- Remove extra dashes¶
# %load shared_elements/remove_extra_dashes.py
prev = cycle
cycle = "correction3"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
replacements = []
for token in tokens:
if token[0] is "-":
replacements.append((token, token[1:]))
elif token[-1] is "-":
replacements.append((token, token[:-1]))
else:
pass
if len(replacements) > 0:
# print("{}: {}".format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction3 Average verified rate: 0.9814938307988949 Average of error rates: 0.020385108922709644 Total token count: 8367534
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
[("'", 8554), ('t', 4413), ('e', 4034), ('d', 3964), ('w', 3794), ('co', 3624), ('m', 3204), ('n', 3085), ('r', 2060), ('f', 2026), ('th', 1718), ('g', 1378), ('mo', 1162), ('u', 928), ('x', 864), ("'the", 601), ('ex', 539), ('re', 505), ('pa', 423), ('q', 399), ('sunday-law', 334), ('tion', 328), ('k', 319), ("the'", 313), ('pp', 299), ("conscience'", 265), ('ch', 260), ('seventhday', 249), ("'of", 238), ('ti', 228), ('ga', 227), ('oc', 219), ('z', 215), ('wm', 215), ('un', 210), ('satolli', 210), ('employes', 209), ('munn', 206), ('ca', 205), ('al', 204), ('yo', 202), ('mc', 191), ('ry', 185), ('id', 185), ('ment', 183), ("'a", 175), ('sunday-closing', 159), ("'to", 154), ('se', 153), ('nd', 152)]
Correction 4 -- Remove extra quotation marks¶
# %load shared_elements/remove_extra_quotation_marks.py
prev = cycle
cycle = "correction4"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
corrections = []
for token in tokens:
token_list = list(token)
last_char = token_list[-1]
if last_char is "'":
if len(token) > 1:
if token_list[-2] is 's' or 'S':
pass
else:
corrections.append((token, re.sub(r"'", r"", token)))
else:
pass
elif token[0] is "'":
corrections.append((token, re.sub(r"'", r"", token)))
else:
pass
if len(corrections) > 0:
# print('{}: {}'.format(filename, corrections))
for correction in corrections:
content = clean.replace_pair(correction, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction4 Average verified rate: 0.9824621432313876 Average of error rates: 0.01930259623992838 Total token count: 8367328
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
[("'", 8053), ('t', 4487), ('e', 4070), ('d', 3979), ('w', 3797), ('co', 3626), ('m', 3209), ('n', 3104), ('r', 2078), ('f', 2046), ('th', 1721), ('g', 1386), ('mo', 1165), ('u', 930), ('x', 865), ('ex', 539), ('re', 508), ('pa', 427), ('q', 401), ('sunday-law', 334), ('tion', 329), ('k', 320), ("the'", 304), ('pp', 299), ("conscience'", 261), ('ch', 260), ('seventhday', 249), ('ti', 230), ('ga', 228), ('oc', 219), ('z', 217), ('wm', 215), ('un', 210), ('satolli', 210), ('employes', 209), ('al', 208), ('munn', 207), ('ca', 206), ('yo', 203), ('mc', 191), ('id', 189), ('ry', 186), ('ment', 183), ('sunday-closing', 160), ('nd', 159), ('se', 157), ('tinel', 147), ('ll', 141), ('il', 137), ('chain-gang', 136)]
Correction 5 -- Rejoin Burst Words¶
Check errors to see if they form verified tokens when joined with the preceding token.
# %load shared_elements/rejoin_burst_words.py
prev = cycle
cycle = "correction5"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
pattern = re.compile("(\s(\w{1,2}\s){5,})")
replacements = []
clean.check_splits(pattern, spelling_dictionary, content, replacements)
if len(replacements) > 0:
# print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction5 Average verified rate: 0.9824669426623706 Average of error rates: 0.01929677708146822 Total token count: 8367280
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
[("'", 8053), ('t', 4483), ('e', 4047), ('d', 3973), ('w', 3794), ('co', 3626), ('m', 3202), ('n', 3097), ('r', 2062), ('f', 2043), ('th', 1721), ('g', 1386), ('mo', 1165), ('u', 930), ('x', 865), ('ex', 539), ('re', 508), ('pa', 427), ('q', 401), ('sunday-law', 334), ('tion', 329), ('k', 320), ("the'", 304), ('pp', 299), ("conscience'", 261), ('ch', 260), ('seventhday', 249), ('ti', 230), ('ga', 228), ('oc', 219), ('z', 217), ('wm', 215), ('un', 210), ('satolli', 210), ('employes', 209), ('al', 208), ('munn', 207), ('ca', 206), ('yo', 203), ('mc', 191), ('id', 189), ('ry', 186), ('ment', 183), ('sunday-closing', 160), ('nd', 159), ('se', 157), ('tinel', 147), ('ll', 141), ('il', 137), ('chain-gang', 136)]
Correction 6 -- Rejoin Split Words¶
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction6"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
errors = reports.identify_errors(tokens, spelling_dictionary)
replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=False)
if len(replacements) > 0:
# print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_split_words(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction6 Average verified rate: 0.9828407009325851 Average of error rates: 0.018864070426738287 Total token count: 8365027
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )
[("'", 8053), ('t', 4443), ('e', 3991), ('d', 3960), ('w', 3791), ('co', 3415), ('m', 3195), ('n', 3057), ('r', 2043), ('f', 2042), ('th', 1658), ('g', 1377), ('mo', 1161), ('u', 927), ('x', 864), ('ex', 523), ('pa', 410), ('q', 399), ('sunday-law', 334), ('k', 318), ("the'", 304), ('pp', 299), ('tion', 281), ('re', 279), ("conscience'", 260), ('ch', 254), ('seventhday', 249), ('ga', 221), ('oc', 218), ('z', 215), ('wm', 215), ('satolli', 210), ('employes', 209), ('munn', 207), ('ti', 203), ('id', 186), ('ry', 183), ('al', 175), ('un', 174), ('ment', 173), ('ca', 170), ('sunday-closing', 160), ('nd', 158), ('tinel', 147), ('se', 143), ('ll', 141), ('chain-gang', 136), ("to'", 134), ('il', 127), ('lb', 125), ('bateham', 122), ('cmsar', 121), ('aleck', 112), ("and'", 109), ('socalled', 106), ('te', 105), ('sunday-rest', 104), ('cc', 104), ('sentin', 104), ('milly', 103), ('erican', 102), ("of'", 98), ('va', 97), ('cd', 94), ('nt', 93), ('fellow-citizens', 92), ('vt', 92), ('ican', 90), ('op', 90), ('tt', 89), ('ft', 89), ('aa', 89), ('-', 88), ("a'", 86), ('eze', 84), ('attorney-general', 83), ('ma', 83), ('csar', 81), ('stundists', 80), ('cereola', 79), ('lc', 79), ('ay', 78), ('neander', 78), ('ne', 77), ('li', 77), ('religio-political', 76), ('tions', 75), ('mc', 74), ('law-abiding', 74), ('sundaylaw', 74), ('ni', 73), ('rican', 73), ('ra', 73), ('ia', 72), ('edmunds', 71), ('rd', 71), ("crafts's", 70), ('es', 70), ("is'", 69), ('si', 69), ('sr', 69), ('freethought', 68), ("in'", 67), ("crafts'", 66), ("folks'", 66), ("cmsar's", 66), ('un-american', 64), ('sabbath-day', 63), ('rest-day', 62), ('ity', 61), ('ie', 61), ('ic', 60), ('sabbath-breaking', 60), ("''", 59), ('mt', 59), ("'s", 59), ('pr', 58), ('na', 57), ('ac', 57), ('ernment', 56), ('candidus', 56), ("barbers'", 56), ('paeifie', 56), ('ments', 55), ('tregelles', 54), ('geikie', 54), ("that'", 54), ('ri', 53), ("an'", 53), ('ce', 53), ('dred', 53), ('employe', 52), ("it'", 52), ('ky', 51), ('assoeiation', 51), ('ob', 51), ('litt', 51), ('wellknown', 50), ('tional', 50), ('erty', 50), ('coxey', 50), ('ci', 50), ('ofthe', 49), ('ary', 49), ('vo', 48), ('tian', 48), ('aro', 48), ('ph', 48), ('ow', 47), ('pre', 47), ('ple', 47), ('ent', 47), ('fa', 47), ('cathedra', 46), ("jones'", 46), ('ro', 46), ('leiper', 45), ('mi', 45), ('forit', 45), ("citizens'", 44), ('sh', 43), ('sundayclosing', 43), ('judefind', 42), ('wo', 42), ('ei', 42), ('times-herald', 42), ('chaingang', 42), ('ct', 42), ('tischendorf', 42), ('ers', 41), ('bo', 41), ("o'keefe", 41), ('hiberty', 41), ('em', 41), ('cr', 41), ("be'", 41), ('merican', 40), ("law'", 40), ('ica', 40), ('sunday-keeping', 40), ('ea', 40), ('yo', 40), ('chapelle', 40), ('ip', 40), ('ance', 39), ("i'", 39), ('oi', 39), ('keane', 39), ('tv', 39), ('copygraph', 39), ("waterman's", 38), ('lachmann', 38), ('kai', 38), ("cruden's", 38), ('oa', 37), ('non-sectarian', 37), ('adress', 37), ("csar's", 37), ('saye', 37), ('ly', 37), ('ther', 37), ('ta', 37), ('io', 37), ('church-and-state', 37), ("american'", 37), ('terest', 36), ('tr', 36), ('coxe', 36), ('dwyer', 36), ("for'", 35), ("not'", 35), ('da', 35), ('pf', 35), ('tir', 35), ('td', 35), ('mass-meeting', 35), ('swiggart', 35), ("this'", 35), ('stitution', 35), ('ba', 35), ('fi', 35), ('az', 35), ('law-making', 34), ('first-day', 34), ('jagoe', 34), ('inthe', 34), ('godgiven', 34), ('ns', 34), ("church'", 34), ('entinel', 34), ('sa', 33), ('oo', 33), ('brunot', 33), ('ork', 33), ('ful', 33), ('cwsar', 33), ('ts', 33), ('gious', 33), ('eh', 33), ('cl', 33), ('ss', 32), ("as'", 32), ('vice-presidents', 32), ('base-ball', 32), ('ap', 32), ("infants'", 32), ('rt', 32), ('saloon-keepers', 32), ('lt', 31), ('sun-worship', 31), ("liberty'", 31), ('rs', 31), ('one-seventh', 31), ('prayer-meeting', 31), ('slattery', 31), ('colitical', 30), ('efical', 30), ('mn', 30), ('vox', 30), ('fr', 30), ('ith', 30), ('ao', 29), ('os', 29), ('mg', 29), ("are'", 29), ('oe', 29), ("roberts'", 29), ('ge', 29), ('tc', 29), ('rn', 29), ('kauffman', 29), ('fo', 29), ('crowther', 29), ("workingmen's", 29), ('puplishing', 29), ("all'", 29), ('holidayism', 28), ('eral', 28), ('self-preservation', 28), ('street-cars', 28), ("parkhurst's", 28), ('charta', 28), ('dei', 28), ('newyork', 28), ('durborow', 28), ('liberty-loving', 28), ('om', 28), ('zwiebach', 28), ("fathers'", 28), ('non-catholics', 28), ('tl', 28), ('fair-minded', 27), ("krug's", 27), ("sabbath'", 27), ('non-observance', 27), ('iu', 27), ('schurman', 27), ("cushing's", 27), ('ve', 27), ('mccauley', 27), ('ou', 27), ('self-defense', 27), ('theo', 27), ('fellow-man', 27), ("gibbons'", 27), ("or'", 26), ('counter-arguments', 26), ('sabbathkeeping', 26), ('ence', 26), ('ablegate', 26), ('fora', 26), ('fide', 26), ('platt', 26), ('thon', 26), ("with'", 26), ('pany', 26), ('itis', 26), ('hto', 26), ('bula', 26), ("god'", 26), ('ig', 26), ("which'", 26), ('pecci', 26), ('divinely-appointed', 26), ('ae', 26), ('non-religious', 26), ("by'", 26), ('ous', 26), ('selfgovernment', 26), ('loth', 25), ("lions'", 25), ('weakley', 25), ('eferson', 25), ('ab', 25), ('nethinim', 25), ('weyler', 25), ('feligious', 25), ('fah', 25), ("d'aubigne", 25), ('martinelli', 25), ('tn', 25), ("at'", 25), ('sundayschool', 25), ('ws', 25), ('ridpath', 25), ('tne', 25), ('publishinc', 25), ('ut', 24), ('krug', 24), ('ceesar', 24), ('stuttle', 24), ('mehan', 24), ('tothe', 24), ('tiie', 24), ('ligion', 24), ('ee', 24), ("preachers'", 24), ('ber', 24), ("if'", 24), ('phelan', 24), ('ib', 23), ('humbert', 23), ('ious', 23), ('lawabiding', 23), ('twentyfive', 23), ('atterbury', 23), ('sunday-sabbath', 23), ('nn', 23), ('arierican', 23), ('ble', 23), ("saints'", 23), ('tbe', 22), ('anb', 22), ('reli', 22), ('ili', 22), ('ef', 22), ('od', 22), ('bt', 22), ('tb', 22), ('ligious', 22), ("have'", 22), ('sas', 22), ('scudder', 22), ('sel', 22), ('wi', 22), ('gi', 22), ('anti-christian', 22), ("we'", 22), ('jeferson', 22), ('xact', 21), ("pub'rs", 21), ("grocers'", 21), ("e'", 21), ('comegys', 21), ('scovel', 21), ('sevent', 21), ('tianity', 21), ('tivity', 21), ('po', 21), ('ject', 21), ('mee', 21), ('ress', 21), ('witham', 21), ('thos', 21), ('ng', 21), ('yefferson', 21), ('ive', 21), ("sunday'", 21), ('notgive', 21), ('eousness', 21), ('postmaster-general', 20), ("hutchings'", 20), ('washburne', 20), ("he'", 20), ('religiopolitical', 20), ('kellog', 20), ('romer', 20), ('healthgiving', 20), ("soldiers'", 20), ('rr', 20), ("satolli's", 20), ('mcglynn', 20), ('sien', 20), ("from'", 20), ("hours'", 20), ('pt', 20), ('anierican', 20), ('rosemond', 20), ("vick's", 20), ('parens', 20), ('bythe', 20), ("on'", 20), ('eign', 20), ('longnecker', 20), ("was'", 20), ("printers'", 19), ('anierica', 19), ('pressense', 19), ('fbr', 19), ('micr', 19), ('everts', 19), ('rorabacher', 19), ("pastors'", 19), ("gov't", 19), ('tp', 19), ('iti', 19), ('ies', 19), ('ible', 19), ('seelye', 19), ('arther', 19), ('wishart', 19), ("people'", 19), ('cosgrove', 19), ('gt', 19), ('det', 19), ('lery', 19), ('abbe', 19), ('ey', 19), ('ation', 19), ("day'", 19), ('hagans', 19), ('montefiore', 19), ("will'", 19), ('governor-general', 19), ('chain-gangs', 19), ('law-makers', 19), ('stundist', 19), ('sundaykeeping', 18), ('dc', 18), ('tae', 18), ('enright', 18), ('anti-catholic', 18), ('non-interference', 18), ('tht', 18), ('oz', 18), ('efferson', 18), ("th'", 18), ('ol', 18), ('tarawera', 18), ('curlett', 18), ('tii', 18), ('tolstoi', 18), ('self-styled', 18), ('--', 18), ("would'", 18), ('nel', 18), ('um', 18), ('ccesar', 18), ('oity', 18), ('wa', 18), ('etteer', 18), ('avery-stuttle', 18), ('nnw', 17), ('mal', 17), ('bf', 17), ('prin', 17), ("righteousness'", 17), ('jt', 17), ('clingman', 17), ('cedarquist', 17), ('newyorkcity', 17), ('tra', 17), ('ical', 17), ('ricans', 17), ('saloon-keeper', 17), ('rubiana', 17), ('prisot', 17), ('post-offices', 17), ('theunited', 17), ('nu', 17), ("no'", 17), ('fl', 17), ('sabbathbreaking', 17), ("a'nan", 17), ('sommerville', 17), ('church-going', 17), ('vernment', 17), ('cood', 17), ('mullally', 17), ('self-governing', 17), ('ist', 17), ('bondst', 17), ('philpott', 17), ('law-breaker', 17), ('ik', 17), ('senti', 17), ('ul', 17), ('ame', 17), ('leivites', 17), ('pel', 17), ('vites', 17), ("apostles'", 17), ('hy', 17), ("schaff's", 16), ('dieu', 16), ('selfevident', 16), ('ag', 16), ('dayto', 16), ('ioo', 16), ('tf', 16), ('prepartion', 16), ('cp', 16), ("enright's", 16), ("his'", 16), ('mit', 16), ('oth', 16), ('relig', 16), ('thepeople', 16), ('sie', 16), ('alfaro', 16), ('symmachus', 16), ('xl', 16), ('su', 16), ('bi', 16), ('facto', 16), ('ples', 16), ('erromanga', 16), ('sunday-keepers', 16), ('dividual', 16), ('peryear', 16), ('peffer', 16), ('re-enact', 16), ('ish', 16), ('socialpurity', 16), ('tains', 16), ('cs', 16), ('wilkie', 16), ("l'", 16), ('hodgson', 16), ('basle', 16), ('bas', 16), ('zi', 15), ('tem', 15), ("who'", 15), ('ite', 15), ('sabbath-breakers', 15), ('americansentinel', 15), ('rhe', 15), ('nonsuch', 15), ('lieve', 15), ('pepsia', 15), ('co-workers', 15), ('gallinger', 15), ('labberton', 15), ('thatthe', 15), ('intrust', 15), ('lttra', 15), ('aw', 15), ('law-breakers', 15), ('milman', 15), ('plete', 15), ('lished', 15), ('rampolla', 15), ("christian'", 15), ('wellbeing', 15), ("milman's", 15), ('klip', 15), ('ons', 15), ('ctesar', 15), ("their'", 15), ('re-enacted', 15), ('populi', 15), ('governinent', 15), ('wor', 15), ('hach', 15), ('sc', 15), ("miles'", 15), ("ginn's", 15), ('ih', 15), ('janes', 15), ('ov', 15), ('sulus', 15), ('stinday', 15), ('xo', 15), ('ectarian', 15), ("o'gorman", 15), ('tkt', 15), ('ddress', 15), ("they'", 15), ('alvierica', 15), ('gress', 15), ('je', 15), ('birney', 15), ('ny', 15), ("religion'", 15), ('avery-stiittle', 15), ('sf', 15), ('tians', 15), ('np', 14), ('thb', 14), ('ver', 14), ('olic', 14), ('qa', 14), ("pres'ts", 14), ('secker', 14), ('intelligeneer', 14), ("'the", 14), ('yeferson', 14), ('ual', 14), ('self-exaltation', 14), ("tourists'", 14), ('chiniquy', 14), ('rittenhouse', 14), ('ormore', 14), ("moses'", 14), ('peo', 14), ('goverment', 14), ('thp', 14), ("sup'ts", 14), ("gault's", 14), ('cortlandt', 14), ('non-union', 14), ('br', 14), ("ccesar's", 14), ('anglo-saxons', 14), ('christain', 14), ('sp', 14), ('restday', 14), ('nr', 14), ('rv', 14), ('eemperance', 14), ('sabbath-breaker', 14), ('gb', 14), ("mcallister's", 14), ('rian', 14), ('malum', 14), ("williams'", 14), ("neander's", 14), ("adventists'", 14), ('lexow', 14), ('confreres', 14), ('thr', 14), ('ncluding', 14), ('af', 14), ('sient', 14), ('tution', 14), ('gl', 14), ('tennesseeans', 14), ('mu', 14), ("but'", 13), ('fon', 13), ('christ-like', 13), ('aivierican', 13), ('leaguers', 13), ('wu', 13), ("mf'g", 13), ('hoc', 13), ('dibbs', 13), ('anti-religious', 13), ('themies', 13), ('dont', 13), ('ex-president', 13), ('gr', 13), ("one'", 13), ('ine', 13), ('two-horned', 13), ('rp', 13), ("coxey's", 13), ('higinbotham', 13), ("t'", 13), ("protestants'", 13), ('pilman', 13), ('froni', 13), ('foi', 13), ('meeting-house', 13), ('mccourt', 13), ('thd', 13), ('waupon', 13), ("f'", 13), ("has'", 13), ('itt', 13), ('hiscock', 13), ('self-contradictory', 13), ("torry's", 13), ('cif', 13), ('gainst', 13), ("its'", 13), ('dn', 13), ('princi', 13), ('cer', 13), ('thi', 13), ('ec', 13), ('hee', 13), ('sabbathkeepers', 13), ('lelvites', 13), ('one-man', 13), ('tms', 13), ('rundschau', 13), ('tlie', 13), ('tax-payers', 13), ('non-christian', 13), ('self-appointed', 13), ("breeders'", 13), ('kossean', 13), ('olesen', 13), ('botkine', 13), ('ntinel', 13), ('volksraad', 13), ("whaley's", 12), ('constitu', 12), ('ke', 12), ('ets', 12), ('pm', 12), ('ess', 12), ('froin', 12), ('robb', 12), ('theire', 12), ('thein', 12), ('ors', 12), ('chappelle', 12), ("churches'", 12), ('self-constituted', 12), ('wouldbe', 12), ('entin', 12), ('week-day', 12), ('thority', 12), ('fast-day', 12), ("were'", 12), ('sk', 12), ('ex-mayor', 12), ('fortynine', 12), ('ture', 12), ('bok', 12), ('whitall', 12), ("cents'", 12), ('sition', 12), ('tte', 12), ('self-interest', 12), ('croker', 12), ("d'aubigne's", 12), ('merous', 12), ('cai', 12), ('combatting', 12), ('vention', 12), ('observa', 12), ('fp', 12), ('yonx', 12), ("such'", 12), ('ht', 12), ('ds', 12), ('masse', 12), ('self-respecting', 12), ('pc', 12), ('ivierican', 12), ('tobe', 12), ("do'", 12), ("christ'", 12), ('ki', 12), ("neat's", 12), ('twenty-fifth', 12), ('ttin', 12), ('maurer', 12), ('bondstreet', 12), ('inter-state', 12), ('lation', 12), ('ang', 12), ("any'", 12), ('rk', 12), ('gx', 12), ('sunday-observance', 12), ('havergal', 11), ("james'", 11), ('thechurch', 11), ('sm', 11), ('cz', 11), ('df', 11), ("dealers'", 11), ('ex-senator', 11), ('erson', 11), ('uncompromis', 11), ('mm', 11), ('ernments', 11), ('sherk', 11), ('fifty-second', 11), ('selfpreservation', 11), ('derstanding', 11), ('nished', 11), ('naw', 11), ('tre', 11), ("states'", 11), ('theni', 11), ("state'", 11), ('communica', 11), ('ular', 11), ('rose-wood', 11), ('androscoggin', 11), ("bakers'", 11), ('mis', 11), ('dition', 11), ('ure', 11), ('forthe', 11), ('taschereau', 11), ('qt', 11), ('tm', 11), ('griffitts', 11), ('fellow-workers', 11), ('kw', 11), ('bradfield', 11), ('houk', 11), ('fot', 11), ("so'", 11), ("'a", 11), ('amyot', 11), ('muskoka', 11), ('pl', 11), ('aivierica', 11), ('erties', 11), ('qf', 11), ('bl', 11), ('merica', 11), ('haye', 11), ('ost', 11), ('ev', 11), ('isthepapacyinprophecy', 11), ('sev', 11), ('mits', 11), ('notre', 11), ('key-note', 11), ('hirsch', 11), ('sealings', 11), ('rin', 11), ('evil-doers', 11), ('nott', 11), ("civil'", 11), ('ml', 11), ('ete', 11), ('kee', 11), ('rc', 11), ('yr', 11), ('ex-governor', 11), ('cramer', 11), ('lr', 11), ('fs', 11), ('informations', 11), ('paoipio', 11), ('twentyfour', 11), ('ridgetown', 11), ('axact', 11), ('times-democrat', 11)]
Correction 7 -- Rejoin Split Words II¶
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction7"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
errors = reports.identify_errors(tokens, spelling_dictionary)
replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=True)
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_split_words(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
AmSn18860101-V01-01-page3.txt: [('to', 're')] AmSn18860301-V01-03-page1.txt: [('SEN', 'TINEL')] AmSn18860301-V01-03-page3.txt: [('in', 'stitutions')] AmSn18860301-V01-03-page8.txt: [('SEN', 'TINEL')] AmSn18860401-V01-04-page1.txt: [('am', 'endmentists')] AmSn18860601-V01-06-page1.txt: [('he', 're'), ('in', 'stitute')] AmSn18860601-V01-06-page7.txt: [('PAR', 'LIAMENTARY'), ('AMER', 'ICAN')] AmSn18860701-V01-07-page3.txt: [('fruit', 'ful')] AmSn18860701-V01-07-page5.txt: [('a', 'ment')] AmSn18860701-V01-07-page6.txt: [('Ch', 'aldea')] AmSn18860801-V01-08-page1.txt: [('amen', 'dmentists')] AmSn18860801-V01-08-page5.txt: [('the', 'Re')] AmSn18860801-V01-08-page6.txt: [('to', 're')] AmSn18860901-V01-09-page3.txt: [('Nation', 'al'), ('a', 'Na')] AmSn18860901-V01-09-page4.txt: [('Amen', 'dmentists'), ('in', 'sistency')] AmSn18860901-V01-09-page7.txt: [('and', 're')] AmSn18861001-V01-10-page3.txt: [('when', 'ce')] AmSn18861001-V01-10-page5.txt: [('be', 'ment')] AmSn18861101-V01-11-page6.txt: [('the', 're')] AmSn18861101-V01-11-page8.txt: [('Nation', 'al')] AmSn18861201-V01-12-page1.txt: [('a', 're')] AmSn18861201-V01-12-page2.txt: [('do', 'ers'), ('the', 'mis')] AmSn18861201-V01-12-page6.txt: [('to', 're')] AmSn18861201-V01-12-page7.txt: [('DE', 'STRUCTION'), ('ap', 'preciated')] AmSn18870101-V02-01-page1.txt: [('fa', 'vorable')] AmSn18870101-V02-01-page2.txt: [('Reform', 'ers'), ('SEN', 'TINEL'), ('re', 'sults'), ('in', 'dicated'), ('the', 're')] AmSn18870101-V02-01-page3.txt: [('SEN', 'TINEL'), ('rep', 'robation')] AmSn18870101-V02-01-page7.txt: [('sev', 'enty'), ('Constitution', 'al')] AmSn18870301-V02-03-page2.txt: [('Social', 'istic'), ('in', 'scription')] AmSn18870301-V02-03-page3.txt: [('la', 'Ws')] AmSn18870301-V02-03-page4.txt: [('decal', 'ogue'), ('to', 'co'), ('Associ', 'ation')] AmSn18870301-V02-03-page5.txt: [('f', 'orce')] AmSn18870301-V02-03-page6.txt: [('Pres', 'byterian')] AmSn18870301-V02-03-page8.txt: [('AMER', 'ICAN')] AmSn18870401-V02-04-page6.txt: [('to', 'es'), ('amen', 'dment'), ('es', 'tablish')] AmSn18870401-V02-04-page8.txt: [('SEN', 'TINEL')] AmSn18870501-V02-05-page3.txt: [('au', 'thority')] AmSn18870501-V02-05-page7.txt: [('COM', 'PLETE')] AmSn18870601-V02-06-page8.txt: [('dimin', 'ished')] AmSn18870701-V02-07-page3.txt: [('Gov', 'ernment')] AmSn18870701-V02-07-page8.txt: [('b', 'id')] AmSn18870801-V02-08-page8.txt: [('a', 'Te')] AmSn18870901-V02-09-page6.txt: [('in', 'terest')] AmSn18871001-V02-10-page1.txt: [('SEN', 'TINEL'), ('condi', 'tions')] AmSn18871101-V02-11-page8.txt: [('Chris', 'tianity')] AmSn18871201-V02-12-page1.txt: [('Associ', 'ation')] AmSn18871201-V02-12-page3.txt: [('SEN', 'TINEL')] AmSn18871201-V02-12-page5.txt: [('De', 'ception')] AmSn18871201-V02-12-page8.txt: [('SEN', 'TINEL'), ('Con', 'Tention')] AmSn18880101-V03-01-page1.txt: [('ex', 'actly')] AmSn18880101-V03-01-page7.txt: [('r', 'oo'), ('sol', 'dier')] AmSn18880101-V03-01-page8.txt: [('SEN', 'TINEL')] AmSn18880201-V03-02-page1.txt: [('Sta', 'te')] AmSn18880301-V03-03-page2.txt: [('conse', 'quently')] AmSn18880301-V03-03-page8.txt: [('SEN', 'TINEL')] AmSn18880401-V03-04-page1.txt: [('legis', 'lative')] AmSn18880501-V03-05-page3.txt: [('a', 'nd'), ('hypo', 'Crites')] AmSn18880501-V03-05-page7.txt: [('Prot', 'estant')] AmSn18880601-V03-06-page2.txt: [('lib', 'erty')] AmSn18880601-V03-06-page7.txt: [('you', 'th'), ('PRES', 'ERVATION')] AmSn18880701-V03-07-page6.txt: [('ADDI', 'TIONAL')] AmSn18880701-V03-07-page7.txt: [('ad', 'Vocate'), ('the', 're')] AmSn18880801-V03-08-page5.txt: [('indus', 'trious')] AmSn18880801-V03-08-page7.txt: [('biblic', 'al')] AmSn18880901-V03-09-page4.txt: [('ar', 'rayed')] AmSn18880901-V03-09-page5.txt: [('perse', 'cuted'), ('ten', 'ets'), ('we', 're')] AmSn18880901-V03-09-page7.txt: [('con', 'tinuance')] AmSn18881001-V03-10-page1.txt: [('SEN', 'TINEL')] AmSn18881001-V03-10-page2.txt: [('fa', 'vor')] AmSn18881001-V03-10-page6.txt: [('swe', 'eping')] AmSn18881001-V03-10-page8.txt: [('amend', 'Ment')] AmSn18881015-V03-10a-page5.txt: [('re', 'ligious'), ('SEN', 'TINEL')] AmSn18881015-V03-10a-page7.txt: [('PRES', 'ERVATION')] AmSn18881101-V03-11-page6.txt: [('genera', 'tions')] AmSn18881201-V03-12-page3.txt: [('no', 'ma')] AmSn18881201-V03-12-page4.txt: [('or', 'ganization')] AmSn18881201-V03-12-page5.txt: [('temper', 'ance')] AmSn18881201-V03-12-page6.txt: [('SEN', 'TINEL')] AmSn18881201-V03-12-page8.txt: [('SEN', 'TINELS'), ('AMER', 'ICAN'), ('s', 'chool')] AmSn18881201-V03-12-page9.txt: [('Go', 'dless'), ('Go', 'od'), ('Go', 'vernmental'), ('N', 'ational'), ('Go', 'vernment'), ('Go', 'spel')] AmSn18890101-V04-01-page8.txt: [('SEN', 'TINEL')] AmSn18890130-V04-02-page7.txt: [('car', 'ried'), ('C', 'ANVASSERS')] AmSn18890130-V04-02-page8.txt: [('to', 'ro')] AmSn18890206-V04-03-page2.txt: [('re', 'spect')] AmSn18890206-V04-03-page7.txt: [('S', 'ABBATH')] AmSn18890213-V04-04-page5.txt: [('SEN', 'TINEL'), ('par', 'ies')] AmSn18890213-V04-04-page6.txt: [('j', 'ust'), ('ref', 'erence')] AmSn18890213-V04-04-page7.txt: [('s', 'ABBATH')] AmSn18890213-V04-04-page8.txt: [('insti', 'tuted'), ('AMER', 'ICAN')] AmSn18890220-V04-05-page4.txt: [('i', 'ndorsement')] AmSn18890220-V04-05-page7.txt: [('S', 'HORTHAND'), ('a', 'reli'), ('DIS', 'COVERIES')] AmSn18890220-V04-05-page8.txt: [('SEN', 'TINEL')] AmSn18890227-V04-06-page2.txt: [('the', 'Ca')] AmSn18890227-V04-06-page7.txt: [('DIS', 'COVERIES')] AmSn18890306-V04-07-page1.txt: [('e', 'th')] AmSn18890306-V04-07-page3.txt: [('SEN', 'TINEL'), ('AMUSE', 'MENTS')] AmSn18890306-V04-07-page5.txt: [('a', 'nd')] AmSn18890306-V04-07-page7.txt: [('DIS', 'COVERIES')] AmSn18890313-V04-08-page2.txt: [('h', 'alf')] AmSn18890313-V04-08-page3.txt: [('so', 're')] AmSn18890313-V04-08-page7.txt: [('an', 'Ab')] AmSn18890320-V04-09-page1.txt: [('SEN', 'TINEL')] AmSn18890320-V04-09-page7.txt: [('Ab', 'surdity'), ('an', 'Ab')] AmSn18890327-V04-10-page7.txt: [('Or', 'ders')] AmSn18890403-V04-11-page8.txt: [('Ber', 'th'), ('at', 'onement')] AmSn18890410-V04-12-page7.txt: [('to', 'Ca'), ('p', 'OP'), ('Ca', 'sar')] AmSn18890410-V04-12-page8.txt: [('SEN', 'TINEL'), ('la', 've'), ('AMER', 'ICAN')] AmSn18890417-V04-13-page1.txt: [('AMER', 'ICAN')] AmSn18890417-V04-13-page7.txt: [('S', 'OUTH'), ('S', 'HORTHAND'), ('T', 'ATTLE'), ('F', 'AMILY')] AmSn18890417-V04-13-page8.txt: [('CALIF', 'ORNIA')] AmSn18890424-V04-14-page7.txt: [('to', 'ri'), ('DIS', 'COVERIES'), ('e', 'CO')] AmSn18890501-V04-15-page1.txt: [('as', 'sembly')] AmSn18890501-V04-15-page7.txt: [('LIBRA', 'RY'), ('DIS', 'COVERIES'), ('con', 'tains')] AmSn18890501-V04-15-page8.txt: [('con', 'tains')] AmSn18890515-V04-16-page7.txt: [('LIBRA', 'RY')] AmSn18890529-V04-18-page6.txt: [('AMER', 'ICAN')] AmSn18890529-V04-18-page7.txt: [('LIBRA', 'RY'), ('A', 'RCHITECTS')] AmSn18890529-V04-18-page8.txt: [('AMER', 'ICAN')] AmSn18890605-V04-19-page1.txt: [('Ber', 'th')] AmSn18890605-V04-19-page3.txt: [('toot', 'hbrushes')] AmSn18890605-V04-19-page7.txt: [('L', 'ife')] AmSn18890612-V04-20-page7.txt: [('s', 'ecs')] AmSn18890612-V04-20-page8.txt: [('SEN', 'TINEL')] AmSn18890619-V04-21-page1.txt: [('j', 'udicial')] AmSn18890619-V04-21-page7.txt: [('a', 'nd')] AmSn18890626-V04-22-page4.txt: [('lib', 'erty')] AmSn18890626-V04-22-page7.txt: [('o', 'ct'), ('P', 'ENCIL')] AmSn18890626-V04-22-page8.txt: [('AMER', 'ICAN')] AmSn18890703-V04-23-page6.txt: [('immo', 'rality')] AmSn18890703-V04-23-page7.txt: [('DIS', 'COVERIES'), ('to', 'rr')] AmSn18890710-V04-24-page7.txt: [('DIS', 'COVERIES'), ('P', 'HOTOGRAPH')] AmSn18890717-V04-25-page5.txt: [('ef', 'fectually')] AmSn18890717-V04-25-page7.txt: [('L', 'ife'), ('H', 'appy')] AmSn18890724-V04-26-page5.txt: [('or', 'iginators')] AmSn18890724-V04-26-page7.txt: [('P', "UBLISHERS'"), ('A', 'lso')] AmSn18890731-V04-27-page3.txt: [('har', 'mony')] AmSn18890731-V04-27-page8.txt: [('SEN', 'TINEL')] AmSn18890807-V04-28-page3.txt: [('an', 'sWerable')] AmSn18890807-V04-28-page6.txt: [('of', 'fice')] AmSn18890814-V04-29-page3.txt: [('SE', 'NTINEL'), ('on', 'ly')] AmSn18890814-V04-29-page7.txt: [('A', 'RCHITECTS')] AmSn18890821-V04-30-page7.txt: [('R', 'eform'), ('D', 'ress'), ('L', "adies'"), ('P', 'ACIFIC')] AmSn18890828-V04-31-page2.txt: [('amuse', 'ments'), ('char', 'acter')] AmSn18890828-V04-31-page4.txt: [('j', 'udiciary')] AmSn18890828-V04-31-page7.txt: [('T', 'IA'), ('S', 'AFETY')] AmSn18890905-V04-32-page3.txt: [('a', 'cre')] AmSn18890911-V04-33-page2.txt: [('follow', 'ers')] AmSn18890911-V04-33-page4.txt: [('discov', 'ered')] AmSn18890911-V04-33-page7.txt: [('A', 'RE')] AmSn18890918-V04-34-page5.txt: [('ac', 'tions'), ('con', 'trary')] AmSn18890918-V04-34-page7.txt: [('i', 'ns')] AmSn18890925-V04-35-page1.txt: [('per', 'secutes')] AmSn18890925-V04-35-page2.txt: [('prov', 'ided')] AmSn18890925-V04-35-page7.txt: [('cele', 'brated'), ('WIS', 'CONSIN')] AmSn18891002-V04-36-page1.txt: [('re', 'spect')] AmSn18891002-V04-36-page4.txt: [('bro', 'ught')] AmSn18891002-V04-36-page7.txt: [('C', 'ivil'), ('P', 'ACIFIC'), ('W', 'aggoner')] AmSn18891002-V04-36-page8.txt: [('re', 'spects'), ('who', 're'), ('persist', 'ent')] AmSn18891009-V04-37-page1.txt: [('leg', 'islatures')] AmSn18891009-V04-37-page2.txt: [('SEN', 'TINEL')] AmSn18891009-V04-37-page6.txt: [('ab', 'sence')] AmSn18891009-V04-37-page8.txt: [('SEN', 'TINEL')] AmSn18891016-V04-38-page7.txt: [('WIS', 'CONSIN'), ('t', 'ie')] AmSn18891016-V04-38-page8.txt: [('AMER', 'ICAN')] AmSn18891023-V04-39-page4.txt: [('to', 'co')] AmSn18891023-V04-39-page7.txt: [('WIS', 'CONSIN'), ('DIS', 'COVERIES')] AmSn18891030-V04-40-page3.txt: [('dis', 'tinction')] AmSn18891030-V04-40-page7.txt: [('WIS', 'CONSIN'), ('DIS', 'COVERIES')] AmSn18891106-V04-41-page2.txt: [('state', 'ment')] AmSn18891106-V04-41-page3.txt: [('a', 'll')] AmSn18891106-V04-41-page8.txt: [('e', 'rr')] AmSn18891113-V04-42-page1.txt: [('af', 'fections')] AmSn18891113-V04-42-page2.txt: [('to', 'es')] AmSn18891113-V04-42-page3.txt: [('a', 'ppointed'), ('a', 'nd'), ('a', 'll')] AmSn18891113-V04-42-page5.txt: [('Gov', 'ernment')] AmSn18891113-V04-42-page6.txt: [('re', 'ligious'), ('a', 're')] AmSn18891113-V04-42-page7.txt: [('with', 'Ee'), ('S', 'cholarship')] AmSn18891120-V04-43-page3.txt: [('fes', 'ses'), ('fail', 'ure')] AmSn18891120-V04-43-page6.txt: [('B', 'IC'), ('an', 'noyance')] AmSn18891120-V04-43-page8.txt: [('be', 'lieve')] AmSn18891127-V04-44-page2.txt: [('persecu', 'tion')] AmSn18891127-V04-44-page7.txt: [('to', 'NI'), ('inf', 'orm')] AmSn18891204-V04-45-page3.txt: [('Massa', 'chusetts')] AmSn18891204-V04-45-page7.txt: [('polit', 'ical'), ('comp', 'ete'), ('l', 'ino')] AmSn18891211-V04-46-page8.txt: [('SEN', 'TINEL')] AmSn18891218-V04-47-page1.txt: [('SEN', 'TINEL'), ('the', 'orize')] AmSn18891218-V04-47-page5.txt: [('re', 'ligious')] AmSn18891218-V04-47-page8.txt: [('SEN', 'TINEL'), ('AMER', 'ICAN')] AmSn18891225-V04-48-page1.txt: [('SEN', 'TINEL'), ('pre', 'vailing')] AmSn18891225-V04-48-page2.txt: [('Chris', 'tian')] AmSn18891225-V04-48-page5.txt: [('we', 're')] AmSn18891225-V04-48-page6.txt: [('danger', 'Ous')] AmSn18891225-V04-48-page7.txt: [('ER', 'ICA')] AmSn18891225-V04-48-page8.txt: [('ab', 'ut')] AmSn18891225-V04-48-page9.txt: [('amend', 'ment')] AmSn18900102-V05-01-page1.txt: [('SEN', 'TINEL')] AmSn18900109-V05-02-page3.txt: [('SEN', 'TINEL')] AmSn18900109-V05-02-page7.txt: [('P', 'IE')] AmSn18900116-V05-03-page3.txt: [('AMER', 'ICAN')] AmSn18900116-V05-03-page4.txt: [('Chris', 'tians')] AmSn18900116-V05-03-page7.txt: [('houseke', 'epers'), ('S', 'ID')] AmSn18900123-V05-04-page1.txt: [('fo', 'rce')] AmSn18900123-V05-04-page2.txt: [('per', 'tains')] AmSn18900123-V05-04-page4.txt: [('Con', 'gress')] AmSn18900123-V05-04-page6.txt: [('a', 'nd'), ('a', 'rc')] AmSn18900123-V05-04-page7.txt: [('ma', 'terial')] AmSn18900123-V05-04-page8.txt: [('mi', 'Ssionary')] AmSn18900130-V05-05-page1.txt: [('re', 'ceive'), ('to', 're')] AmSn18900130-V05-05-page4.txt: [('addi', 'tion')] AmSn18900130-V05-05-page5.txt: [('SEN', 'TINEL')] AmSn18900130-V05-05-page7.txt: [('a', 'da'), ('the', 're')] AmSn18900130-V05-05-page8.txt: [('treas', 'ury')] AmSn18900206-V05-06-page3.txt: [('threat', 'ening')] AmSn18900206-V05-06-page5.txt: [('fur', 'ther')] AmSn18900206-V05-06-page7.txt: [('M', 'edicine'), ('P', 'IE'), ('D', 'om')] AmSn18900206-V05-06-page8.txt: [('Legis', 'lature')] AmSn18900213-V05-07-page7.txt: [('SIG', 'NS')] AmSn18900220-V05-08-page1.txt: [('the', 'se')] AmSn18900220-V05-08-page2.txt: [('utilita', 'rian')] AmSn18900220-V05-08-page6.txt: [('sic', 'kness')] AmSn18900227-V05-09-page1.txt: [('origi', 'nated')] AmSn18900227-V05-09-page2.txt: [('Ken', 'tucky'), ('AMER', 'ICAN'), ('Com', 'mittee')] AmSn18900227-V05-09-page3.txt: [('SEC', 'TION')] AmSn18900227-V05-09-page7.txt: [('a', 'nd')] AmSn18900306-V05-10-page1.txt: [('Pa', 'se')] AmSn18900306-V05-10-page2.txt: [('Do', 'uay'), ('Wis', 'consin'), ('Roman', 'Ce')] AmSn18900306-V05-10-page6.txt: [('man', 'IC')] AmSn18900306-V05-10-page7.txt: [('E', 'RI')] AmSn18900313-V05-11-page4.txt: [('pa', 'rt')] AmSn18900313-V05-11-page6.txt: [('evi', 'dently')] AmSn18900313-V05-11-page7.txt: [('AMER', 'ICAN')] AmSn18900320-V05-12-page6.txt: [('SEN', 'TINEL')] AmSn18900327-V05-13-page2.txt: [('o', 'wn')] AmSn18900327-V05-13-page7.txt: [('Y', 'es')] AmSn18900403-V05-14-page2.txt: [('syS', 'tem')] AmSn18900410-V05-15-page3.txt: [('neces', 'sArily')] AmSn18900410-V05-15-page4.txt: [('par', 'se')] AmSn18900410-V05-15-page6.txt: [('ques', 'tion')] AmSn18900410-V05-15-page7.txt: [('M', 'ILLIONS')] AmSn18900417-V05-16-page1.txt: [('sup', 'pression'), ('V', 'OLUME')] AmSn18900417-V05-16-page6.txt: [('s', 'chool')] AmSn18900417-V05-16-page7.txt: [('M', 'ILLIONS')] AmSn18900417-V05-16-page8.txt: [('Cath', 'olic')] AmSn18900424-V05-17-page7.txt: [('eve', 'ryone'), ('W', 'orld')] AmSn18900501-V05-18-page1.txt: [('til', 'th')] AmSn18900501-V05-18-page2.txt: [('on', 'ly'), ('Chris', 'tians')] AmSn18900501-V05-18-page6.txt: [('objec', 'tionable')] AmSn18900508-V05-19-page3.txt: [('S', 'ENTINEL')] AmSn18900515-V05-20-page1.txt: [('minis', 'ters')] AmSn18900515-V05-20-page5.txt: [('i', 'mportant')] AmSn18900515-V05-20-page7.txt: [('eve', 'ryone')] AmSn18900522-V05-21-page2.txt: [('Chr', 'ist'), ('right', 'eousness')] AmSn18900522-V05-21-page3.txt: [('con', 'sume')] AmSn18900529-V05-22-page2.txt: [('speak', 'easies')] AmSn18900529-V05-22-page7.txt: [('W', 'onderful')] AmSn18900605-V05-23-page7.txt: [('W', 'orld')] AmSn18900612-V05-24-page3.txt: [('govern', 'Ment')] AmSn18900612-V05-24-page7.txt: [('at', 'tached')] AmSn18900612-V05-24-page8.txt: [('state', 'ment')] AmSn18900619-V05-25-page2.txt: [('spe', 'cifically')] AmSn18900619-V05-25-page7.txt: [('M', 'IGHTY')] AmSn18900626-V05-26-page7.txt: [('hea', 'th'), ('M', 'IGHTY')] AmSn18900717-V05-28-page6.txt: [('protec', 'tion')] AmSn18900724-V05-29-page1.txt: [('govern', 'ments')] AmSn18900724-V05-29-page7.txt: [('the', 'ba')] AmSn18900731-V05-30-page1.txt: [('SEN', 'TINEL')] AmSn18900731-V05-30-page2.txt: [('a', 'lso')] AmSn18900731-V05-30-page6.txt: [('o', 'ne')] AmSn18900814-V05-32-page4.txt: [('consta', 'bles'), ('a', 'boriginal')] AmSn18900814-V05-32-page8.txt: [('G', 'overnment')] AmSn18900821-V05-33-page5.txt: [('beau', 'tiful')] AmSn18900821-V05-33-page7.txt: [('invest', 'ment'), ('sten', 'cil'), ('at', 'tached')] AmSn18900828-V05-34-page7.txt: [('Ob', 'ject'), ('at', 'tached')] AmSn18900904-V05-35-page7.txt: [('P', 'IE')] AmSn18900911-V05-36-page2.txt: [('in', 'hibiting')] AmSn18900911-V05-36-page3.txt: [('SEN', 'TINEL')] AmSn18900918-V05-37-page1.txt: [('B', 'ib'), ('a', 'griC'), ('o', 'ft'), ('m', 'ay'), ('se', 'cy'), ('t', 'Ia')] AmSn18900918-V05-37-page4.txt: [('i', 'asi'), ('s', 'IL'), ('P', 'EtIt'), ('A', 'te'), ('m', 'Ap'), ('t', 'oi'), ('b', 'ei')] AmSn18900918-V05-37-page5.txt: [('lA', 'rd')] AmSn18900918-V05-37-page6.txt: [('o', 'ut')] AmSn18900918-V05-37-page8.txt: [('era', 'th'), ('n', 'ip'), ('e', 'ying'), ('d', 'id'), ('s', 'op')] AmSn18900925-V05-38-page8.txt: [('SEN', 'TINEL'), ('over', 'whelmingly')] AmSn18901009-V05-40-page6.txt: [('AMER', 'ICAN')] AmSn18901016-V05-41-page4.txt: [('regula', 'tions'), ('legisla', 'tion')] AmSn18901016-V05-41-page5.txt: [('amuse', 'ments')] AmSn18901016-V05-41-page6.txt: [('CON', 'SERVATOR')] AmSn18901016-V05-41-page7.txt: [('W', 'orkings')] AmSn18901023-V05-42-page1.txt: [('BY', 'TES')] AmSn18901023-V05-42-page2.txt: [('a', 'nd'), ('stud', 'ies'), ('to', 'tal')] AmSn18901023-V05-42-page3.txt: [('to', 're')] AmSn18901023-V05-42-page4.txt: [('a', 'bOVe'), ('a', 'nd')] AmSn18901023-V05-42-page6.txt: [('cap', 'tured'), ('move', 'ment')] AmSn18901030-V05-43-page1.txt: [('Living', 'ston')] AmSn18901030-V05-43-page3.txt: [('Hollow', 'ay'), ('author', 'ities')] AmSn18901030-V05-43-page5.txt: [('AMER', 'ICA')] AmSn18901106-V05-44-page3.txt: [('SEN', 'TINEL'), ('in', 'Volves')] AmSn18901106-V05-44-page7.txt: [('con', 'Stantly')] AmSn18901113-V05-45-page2.txt: [('P', 'ress'), ('Govern', 'Ment')] AmSn18901113-V05-45-page7.txt: [('A', 'ND')] AmSn18901120-V05-46-page3.txt: [('ra', 'pidly')] AmSn18901127-V05-47-page4.txt: [('me', 'morialize'), ('per', 'fect')] AmSn18901127-V05-47-page7.txt: [('con', 'stantly'), ('T', 'ams')] AmSn18901127-V05-47-page8.txt: [('rega', 'rds')] AmSn18901204-V05-48-page8.txt: [('neces', 'sary')] AmSn18901211-V05-49-page4.txt: [('repugna', 'nt')] AmSn18901211-V05-49-page7.txt: [('con', 'stantly')] AmSn18901218-V05-50-page3.txt: [('minor', 'ity'), ('theol', 'ogy'), ('rewa', 'rds'), ('the', 're')] AmSn18901218-V05-50-page5.txt: [('the', 'refore')] AmSn18901218-V05-50-page6.txt: [('d', 'ays')] AmSn18901218-V05-50-page9.txt: [('con', 'vention'), ('B', 'ible')] AmSn18910101-V06-01-page1.txt: [('SEN', 'TINEL')] AmSn18910101-V06-01-page3.txt: [('w', 'ould')] AmSn18910101-V06-01-page4.txt: [('sup', 'pression')] AmSn18910101-V06-01-page5.txt: [('tor', 'tures')] AmSn18910108-V06-02-page6.txt: [('legisla', 'tion')] AmSn18910115-V06-03-page3.txt: [('SEN', 'TINEL'), ('pros', 'pered')] AmSn18910115-V06-03-page4.txt: [('A', 'merican')] AmSn18910115-V06-03-page7.txt: [('pe', 'ns'), ('l', 'eather')] AmSn18910115-V06-03-page8.txt: [('a', 'nd'), ('AMER', 'ICAN')] AmSn18910122-V06-04-page2.txt: [('t', 'iti'), ('pro', 'nounced')] AmSn18910122-V06-04-page6.txt: [('prop', 'erty')] AmSn18910122-V06-04-page8.txt: [('AMER', 'ICAN')] AmSn18910129-V06-05-page2.txt: [('de', 'manded')] AmSn18910129-V06-05-page3.txt: [('per', 'ception'), ('in', 'stinct')] AmSn18910129-V06-05-page4.txt: [('Govern', 'Ment')] AmSn18910129-V06-05-page7.txt: [('Y', 'ork'), ('devel', 'opment')] AmSn18910212-V06-07-page3.txt: [('PE', 'TITIONED'), ('Leg', 'islature')] AmSn18910212-V06-07-page4.txt: [('An', 'oa'), ('AMER', 'ICAN')] AmSn18910212-V06-07-page8.txt: [('anniver', 'sary')] AmSn18910219-V06-08-page3.txt: [('SEN', 'TINEL')] AmSn18910219-V06-08-page7.txt: [('w', 'ork')] AmSn18910226-V06-09-page7.txt: [('w', 'ork')] AmSn18910226-V06-09-page8.txt: [('de', 'Cided')] AmSn18910305-V06-10-page4.txt: [('num', 'ber')] AmSn18910305-V06-10-page6.txt: [('mani', 'festation'), ('as', 'sured')] AmSn18910305-V06-10-page8.txt: [('AMER', 'ICAN')] AmSn18910319-V06-12-page6.txt: [('A', 'MERICAN')] AmSn18910319-V06-12-page7.txt: [('W', 'ORSHIP')] AmSn18910326-V06-13-page8.txt: [('worsh', 'ip')] AmSn18910402-V06-14-page2.txt: [('SEN', 'TINEL')] AmSn18910402-V06-14-page4.txt: [('a', 'bl')] AmSn18910402-V06-14-page8.txt: [('A', 'MERICAN'), ('inter', 'ests')] AmSn18910409-V06-15-page4.txt: [('the', 'se')] AmSn18910409-V06-15-page7.txt: [('a', 'ges')] AmSn18910416-V06-16-page1.txt: [('In', 'ti')] AmSn18910416-V06-16-page2.txt: [('state', 'ment')] AmSn18910416-V06-16-page3.txt: [('A', 'nd')] AmSn18910416-V06-16-page8.txt: [('Y', 'ork')] AmSn18910423-V06-17-page4.txt: [('infringe', 'ment')] AmSn18910423-V06-17-page6.txt: [('a', 'nd')] AmSn18910423-V06-17-page8.txt: [('the', 'se'), ('con', 'gregation')] AmSn18910430-V06-18-page7.txt: [('THE', 'Ca')] AmSn18910507-V06-19-page5.txt: [('AM', 'ERICAN')] AmSn18910507-V06-19-page8.txt: [('A', 'MERICAN')] AmSn18910514-V06-20-page3.txt: [('op', 'portunity')] AmSn18910514-V06-20-page7.txt: [('at', 'tached')] AmSn18910521-V06-21-page4.txt: [('infringe', 'ment')] AmSn18910521-V06-21-page7.txt: [('F', 'iNe'), ('W', 'ith')] AmSn18910604-V06-23-page6.txt: [('AMER', 'ICAN')] AmSn18910604-V06-23-page8.txt: [('for', 'th')] AmSn18910611-V06-24-page2.txt: [('SEN', 'TINEL'), ('A', 'ugustus')] AmSn18910618-V06-25-page6.txt: [('Com', "mittee's"), ('AM', 'ERICAN')] AmSn18910618-V06-25-page7.txt: [('Y', 'ORK')] AmSn18910625-V06-26-page3.txt: [('pun', 'ished')] AmSn18910625-V06-26-page4.txt: [('in', 'ti')] AmSn18910625-V06-26-page5.txt: [('prac', 'tically')] AmSn18910625-V06-26-page7.txt: [('for', 'te')] AmSn18910709-V06-27-page3.txt: [('con', 'versant')] AmSn18910709-V06-27-page6.txt: [('to', 'co'), ('thor', 'oughly')] AmSn18910709-V06-27-page7.txt: [('t', 'iA')] AmSn18910709-V06-27-page8.txt: [('frater', 'nity')] AmSn18910716-V06-28-page1.txt: [('the', 're')] AmSn18910716-V06-28-page6.txt: [('C', 'urt')] AmSn18910716-V06-28-page8.txt: [('SEN', 'TINEL')] AmSn18910806-V06-31-page6.txt: [('Amer', 'ican')] AmSn18910827-V06-34-page5.txt: [('Chris', 'tian')] AmSn18910827-V06-34-page7.txt: [('HEN', 'RY')] AmSn18910827-V06-34-page8.txt: [('min', 'ister')] AmSn18910903-V06-35-page1.txt: [('Govern', 'ment')] AmSn18910903-V06-35-page2.txt: [('prin', 'ciple')] AmSn18910903-V06-35-page5.txt: [('Ex', 'amples')] AmSn18910903-V06-35-page6.txt: [('de', 'tectives'), ('AMER', 'ICAN')] AmSn18910903-V06-35-page7.txt: [('Ca', 'Ns'), ('W', 'itchcraft'), ('to', 'ld')] AmSn18910910-V06-36-page1.txt: [('legisla', 'tion')] AmSn18910910-V06-36-page4.txt: [('quota', 'tions'), ('ma', 'jority')] AmSn18910910-V06-36-page5.txt: [('invec', 'tives'), ('per', 'se'), ('char', 'acterizes'), ('con', 'dition'), ('condi', 'tion'), ('Chris', 'tian')] AmSn18910910-V06-36-page6.txt: [('hun', 'dred')] AmSn18910910-V06-36-page7.txt: [('P', 'ress')] AmSn18910917-V06-37-page3.txt: [('be', 'lieves'), ('end', 'ureth'), ('man', 'agers')] AmSn18910917-V06-37-page6.txt: [('pro', 'tects'), ('Gov', 'ernor'), ('sym', 'pathy'), ('Chris', 'tian')] AmSn18910917-V06-37-page7.txt: [('Or', 'namental')] AmSn18910924-V06-38-page2.txt: [('min', 'ister')] AmSn18910924-V06-38-page5.txt: [('per', 'secution')] AmSn18910924-V06-38-page7.txt: [('a', 'nd')] AmSn18911022-V06-41-page1.txt: [('in', 'Sisted')] AmSn18911022-V06-41-page3.txt: [('SEN', 'TINEL')] AmSn18911022-V06-41-page4.txt: [('per', 'se')] AmSn18911022-V06-41-page6.txt: [('agree', 'ment'), ('Camp', 'bellites')] AmSn18911022-V06-41-page8.txt: [('state', 'ment'), ('a', 'llow')] AmSn18911112-V06-44-page8.txt: [('T', 'IE')] AmSn18911126-V06-46-page4.txt: [('expres', 'sed')] AmSn18911126-V06-46-page8.txt: [('legal', 'ize')] AmSn18911203-V06-47-page1.txt: [('SEN', 'TINEL')] AmSn18911203-V06-47-page3.txt: [('IN', 'STITUTION')] AmSn18911203-V06-47-page7.txt: [('the', 'ba'), ('me', 'chanics')] AmSn18911210-V06-48-page6.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER')] AmSn18911217-V06-49-page4.txt: [('christian', 'ized')] AmSn18911217-V06-49-page5.txt: [('LIV', 'INGSTONE'), ('differ', 'ent')] AmSn18911224-V06-50-page2.txt: [('persecu', 'tion')] AmSn18911224-V06-50-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('LIV', 'INGSTONE')] AmSn18920107-V07-01-page5.txt: [('reli', 'gious')] AmSn18920107-V07-01-page6.txt: [('S', 'te')] AmSn18920107-V07-01-page7.txt: [('Pi', 'oneer')] AmSn18920107-V07-01-page8.txt: [('A', 'ny')] AmSn18920114-V07-02-page5.txt: [('nefa', 'rious')] AmSn18920114-V07-02-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('HEN', 'RY')] AmSn18920121-V07-03-page3.txt: [('see', 'th'), ('elo', 'quently')] AmSn18920128-V07-04-page5.txt: [('rec', 'ognize')] AmSn18920128-V07-04-page7.txt: [('HEN', 'RY'), ('diction', 'ary'), ('FOR', 'EIGN')] AmSn18920204-V07-05-page5.txt: [('P', 'ig'), ('a', 'cc'), ('A', 'CC'), ('o', 'ct'), ('b', 'id')] AmSn18920204-V07-05-page6.txt: [('SEN', 'TINEL'), ('to', 're')] AmSn18920204-V07-05-page7.txt: [('ORIGIN', 'AL'), ('HEN', 'RY'), ('GAZ', 'ETTEER')] AmSn18920211-V07-06-page5.txt: [('tri', 'bunal')] AmSn18920211-V07-06-page7.txt: [('e', 'LI'), ('HEN', 'RY'), ('diction', 'ary'), ('FOR', 'EIGN')] AmSn18920218-V07-07-page2.txt: [('be', 'lial'), ('Chris', 'tianity'), ('indict', 'ment')] AmSn18920218-V07-07-page3.txt: [('fr', 'om')] AmSn18920218-V07-07-page6.txt: [('pub', 'lished')] AmSn18920218-V07-07-page7.txt: [('FOR', 'EIGN')] AmSn18920225-V07-08-page7.txt: [('GAZ', 'ETTEER')] AmSn18920303-V07-09-page7.txt: [('DICTION', 'ARY'), ('diction', 'ary'), ('miner', 'als'), ('Chris', 'tianity')] AmSn18920310-V07-10-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')] AmSn18920317-V07-11-page7.txt: [('diction', 'ary')] AmSn18920324-V07-12-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')] AmSn18920331-V07-13-page1.txt: [('indi', 'cates')] AmSn18920331-V07-13-page3.txt: [('A', 'fter')] AmSn18920331-V07-13-page5.txt: [('by', 're')] AmSn18920407-V07-14-page7.txt: [('GEN', 'ERAL')] AmSn18920414-V07-15-page2.txt: [('SEN', 'TINEL'), ('the', 'se')] AmSn18920414-V07-15-page4.txt: [('com', 'mittee')] AmSn18920414-V07-15-page7.txt: [('Am', 'erican')] AmSn18920428-V07-17-page7.txt: [('l', 'imes'), ('t', 'ok')] AmSn18920505-V07-18-page3.txt: [('f', 'oresaw')] AmSn18920505-V07-18-page4.txt: [('in', 'corporating')] AmSn18920505-V07-18-page5.txt: [('AME', 'RICAN')] AmSn18920519-V07-20-page6.txt: [('w', 'hich')] AmSn18920519-V07-20-page7.txt: [('DICTION', 'ARY'), ('diction', 'ary')] AmSn18920526-V07-21-page3.txt: [('in', 'asmuch')] AmSn18920526-V07-21-page4.txt: [('author', 'ities')] AmSn18920526-V07-21-page5.txt: [('CO', 'CO'), ('In', 'teresting'), ('CO', 'Co'), ('it', 'al'), ('Go', 'vernment')] AmSn18920526-V07-21-page7.txt: [('FOR', 'EIGN')] AmSn18920602-V07-22-page4.txt: [('the', 're')] AmSn18920602-V07-22-page7.txt: [('in', 'hibition')] AmSn18920609-V07-23-page1.txt: [('Con', 'gress')] AmSn18920609-V07-23-page3.txt: [('T', 'iE')] AmSn18920616-V07-24-page7.txt: [('H', 'Id'), ('H', 'UI')] AmSn18920623-V07-25-page2.txt: [('dis', 'turbed')] AmSn18920623-V07-25-page4.txt: [('gen', 'erally'), ('w', 'ith')] AmSn18920630-V07-26-page5.txt: [('AMER', 'ICAN')] AmSn18920630-V07-26-page8.txt: [('SEN', 'TINEL')] AmSn18920714-V07-27-page6.txt: [('SEN', 'TINEL'), ('i', 'ncline')] AmSn18920721-V07-28-page7.txt: [('In', 'ca'), ('FOR', 'EIGN')] AmSn18920728-V07-29-page3.txt: [('Con', 'stitution')] AmSn18920728-V07-29-page5.txt: [('to', 'te')] AmSn18920728-V07-29-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('diction', 'ary')] AmSn18920728-V07-29-page8.txt: [('men', 'pleasers')] AmSn18920804-V07-30-page7.txt: [('DICTION', 'ARY'), ('miner', 'als')] AmSn18920811-V07-31-page3.txt: [('a', 'gainst')] AmSn18920811-V07-31-page4.txt: [('Expo', 'sition'), ('he', 're')] AmSn18920811-V07-31-page7.txt: [('GAZ', 'ETTEER'), ('diction', 'ary')] AmSn18920818-V07-32-page7.txt: [('FOR', 'EIGN')] AmSn18920818-V07-32-page8.txt: [('win', 'ce')] AmSn18920825-V07-33-page2.txt: [('to', 're')] AmSn18920825-V07-33-page3.txt: [('per', 'se')] AmSn18920825-V07-33-page7.txt: [('r', 'efer')] AmSn18920825-V07-33-page8.txt: [('annoy', 'ance')] AmSn18920901-V07-34-page1.txt: [('E', 'qual')] AmSn18920901-V07-34-page3.txt: [('con', 'nection')] AmSn18920901-V07-34-page4.txt: [('four', 'teenth'), ('and', 're')] AmSn18920901-V07-34-page7.txt: [('GAZ', 'ETTEER'), ('miner', 'als')] AmSn18920908-V07-35-page8.txt: [('in', 'delibly')] AmSn18920915-V07-36-page7.txt: [('HEN', 'RY')] AmSn18920915-V07-36-page8.txt: [('per', 'mitted')] AmSn18920929-V07-38-page6.txt: [('and', 'es')] AmSn18920929-V07-38-page7.txt: [('FOR', 'EIGN'), ('in', 'vited')] AmSn18921006-V07-39-page7.txt: [('DICTION', 'ARY'), ('diction', 'ary')] AmSn18921006-V07-39-page8.txt: [('o', 'ne')] AmSn18921013-V07-40-page1.txt: [('w', 'hich')] AmSn18921020-V07-41-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')] AmSn18921027-V07-42-page1.txt: [('AMER', 'ICAN')] AmSn18921027-V07-42-page7.txt: [('Christian', 'ity'), ('DICTION', 'ARY'), ('FOR', 'EIGN')] AmSn18921103-V07-43-page4.txt: [('m', 'eeting')] AmSn18921110-V07-44-page8.txt: [('S', 'tE')] AmSn18921117-V07-45-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')] AmSn18921117-V07-45-page8.txt: [('per', 'centage')] AmSn18921124-V07-46-page7.txt: [('leg', 'islation')] AmSn18921201-V07-47-page7.txt: [('GAZ', 'ETTEER'), ('FOR', 'EIGN')] AmSn18921208-V07-48-page7.txt: [('SEN', 'TINEL')] AmSn18921208-V07-48-page8.txt: [('to', 'ft')] AmSn18921215-V07-49-page2.txt: [('AMER', 'ICAN')] AmSn18921215-V07-49-page7.txt: [('HEN', 'RY')] AmSn18921215-V07-49-page8.txt: [('St', 'atesman'), ('St', 'evenson'), ('T', 'IE'), ('St', 'ates')] AmSn18921222-V07-50-page1.txt: [('LIB', 'ERTY')] AmSn18921222-V07-50-page8.txt: [('SEN', 'TINEL')] AmSn18930112-V08-02-page2.txt: [('SEN', 'TINEL')] AmSn18930112-V08-02-page7.txt: [('H', 'AL')] AmSn18930112-V08-02-page8.txt: [('non', 'feasance'), ('Jan', 'uary'), ('add', 'ress')] AmSn18930119-V08-03-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('DICTION', 'ARY')] AmSn18930126-V08-04-page7.txt: [('C', 'reola'), ('t', 'ome')] AmSn18930126-V08-04-page8.txt: [('add', 'ress')] AmSn18930202-V08-05-page6.txt: [('w', 'orld')] AmSn18930209-V08-06-page4.txt: [('A', 'lk')] AmSn18930209-V08-06-page6.txt: [('Ines', 'timable')] AmSn18930209-V08-06-page7.txt: [('hodge', 'podge')] AmSn18930216-V08-07-page6.txt: [('C', 'hristian'), ('N', 'ation'), ('A', 're')] AmSn18930216-V08-07-page7.txt: [('AMER', 'ICAN')] AmSn18930309-V08-10-page7.txt: [('at', 'torney'), ('THE', 'CA')] AmSn18930316-V08-11-page5.txt: [('per', 'se')] AmSn18930316-V08-11-page7.txt: [('DICTION', 'ARY')] AmSn18930406-V08-14-page3.txt: [('exemp', 'tion')] AmSn18930406-V08-14-page7.txt: [('V', 'IC'), ('i', 'NT'), ('FUR', 'NISHED')] AmSn18930406-V08-14-page8.txt: [('add', 'ress')] AmSn18930413-V08-15-page5.txt: [('l', 'aw')] AmSn18930420-V08-16-page7.txt: [('DICTION', 'ARY')] AmSn18930427-V08-17-page2.txt: [('JOSE', 'PH')] AmSn18930504-V08-18-page7.txt: [('DICTION', 'ARY'), ('FUR', 'NISHED')] AmSn18930518-V08-20-page7.txt: [('DICTION', 'ARY')] AmSn18930601-V08-22-page7.txt: [('GAZ', 'ETTEER'), ('gaz', 'etteer')] AmSn18930608-V08-23-page4.txt: [('w', 'orship')] AmSn18930608-V08-23-page6.txt: [('lib', 'erty')] AmSn18930615-V08-24-page5.txt: [('con', 'stitutionality')] AmSn18930615-V08-24-page8.txt: [('SEN', 'TINEL')] AmSn18930629-V08-26-page8.txt: [('A', 'MI')] AmSn18930706-V08-27-page2.txt: [('a', 'fferent')] AmSn18930706-V08-27-page3.txt: [('per', 'secution')] AmSn18930706-V08-27-page6.txt: [('h', 'eretics')] AmSn18930706-V08-27-page7.txt: [('DICTION', 'ARY')] AmSn18930720-V08-29-page4.txt: [('mission', 'ary')] AmSn18930720-V08-29-page8.txt: [('rep', 'utation'), ('c', 'ato')] AmSn18930727-V08-30-page2.txt: [('amend', 'ments'), ('AMER', 'ICAN')] AmSn18930803-V08-31-page7.txt: [('W', 'afers'), ('diction', 'ary')] AmSn18930810-V08-32-page3.txt: [('Wo', "rld's")] AmSn18930810-V08-32-page5.txt: [('In', 'terior')] AmSn18930810-V08-32-page8.txt: [('Haw', 'keye')] AmSn18930817-V08-33-page6.txt: [('per', 'se')] AmSn18930831-V08-34-page2.txt: [('van', 'ity')] AmSn18930831-V08-34-page5.txt: [('per', 'se')] AmSn18930831-V08-34-page8.txt: [('pro', 'hibited')] AmSn18930907-V08-35-page2.txt: [('a', 'bstract')] AmSn18930914-V08-36-page4.txt: [('hi', 'gher'), ('REESTABLISH', 'MENT')] AmSn18930921-V08-37-page4.txt: [('the', 'Es')] AmSn18930928-V08-38-page5.txt: [('SEN', 'TINEL')] AmSn18930928-V08-38-page7.txt: [('r', 'esearches')] AmSn18930928-V08-38-page8.txt: [('m', 'ade')] AmSn18931012-V08-40-page2.txt: [('SEN', 'TINEL')] AmSn18931012-V08-40-page3.txt: [('mod', 'ern'), ('PUN', 'ISHING')] AmSn18931012-V08-40-page7.txt: [('DICTION', 'ARY'), ('sub', 'stantially')] AmSn18931019-V08-41-page7.txt: [('P', 'UBLIC'), ('W', 'ork'), ('C', 'oncluding'), ('de', 'scription')] AmSn18931026-V08-42-page2.txt: [('are', 'Ca'), ('Ca', 'sar')] AmSn18931026-V08-42-page3.txt: [('dec', 'laration')] AmSn18931026-V08-42-page6.txt: [('r', 'ather')] AmSn18931026-V08-42-page7.txt: [('GEN', 'ERAL'), ('HEN', 'RY'), ('GAZ', 'ETTEER')] AmSn18931026-V08-42-page8.txt: [('o', 'ne')] AmSn18931102-V08-43-page4.txt: [('hob', 'nobbing')] AmSn18931102-V08-43-page8.txt: [('to', 'co')] AmSn18931109-V08-44-page2.txt: [('sub', 'jects')] AmSn18931109-V08-44-page8.txt: [('j', 'udgment')] AmSn18931116-V08-45-page1.txt: [('phys', 'ically')] AmSn18931116-V08-45-page3.txt: [('NO', 'VEMBER')] AmSn18931116-V08-45-page7.txt: [('p', 'es')] AmSn18931116-V08-45-page8.txt: [('SEN', 'TINEL')] AmSn18931123-V08-46-page1.txt: [('are', 'Ca')] AmSn18931123-V08-46-page2.txt: [('SEN', 'TINEL')] AmSn18931123-V08-46-page4.txt: [('light', 'heartedness'), ('per', 'se')] AmSn18931130-V08-47-page1.txt: [('contempo', 'raneous')] AmSn18931130-V08-47-page6.txt: [('threat', 'ened')] AmSn18931207-V08-48-page1.txt: [('char', 'acter'), ('and', 're'), ('trans', 'gress')] AmSn18931207-V08-48-page4.txt: [('pro', 'tected'), ('pro', 'Vides'), ('establish', 'Ment')] AmSn18931214-V08-49-page5.txt: [('SEN', 'TINEL'), ('B', 'arabbas')] AmSn18931221-V08-50-page3.txt: [('heart', 'ily')] AmSn18931221-V08-50-page8.txt: [('ten', 'ts'), ('A', 'ccording')] AmSn18940104-V09-01-page1.txt: [('SEN', 'TINEL')] AmSn18940104-V09-01-page2.txt: [('con', 'tinent')] AmSn18940104-V09-01-page8.txt: [('c', 'id')] AmSn18940111-V09-02-page4.txt: [('o', 'rder')] AmSn18940118-V09-03-page2.txt: [('abso', 'lutely')] AmSn18940118-V09-03-page6.txt: [('Chris', 'tian')] AmSn18940125-V09-04-page2.txt: [('enthu', 'siastic')] AmSn18940125-V09-04-page7.txt: [('P', 'UBLIC')] AmSn18940201-V09-05-page1.txt: [('Y', 'ork')] AmSn18940201-V09-05-page2.txt: [('bus', 'es')] AmSn18940201-V09-05-page6.txt: [('d', 'ay')] AmSn18940208-V09-06-page2.txt: [('cha', 'mpion')] AmSn18940308-V09-10-page2.txt: [('A', 'NT')] AmSn18940315-V09-11-page1.txt: [('SEN', 'TINEL')] AmSn18940315-V09-11-page2.txt: [('to', 'Ca')] AmSn18940322-V09-12-page6.txt: [('Chris', 'tian')] AmSn18940322-V09-12-page7.txt: [('diction', 'ary')] AmSn18940329-V09-13-page3.txt: [('tes', 'tifies')] AmSn18940329-V09-13-page4.txt: [('A', 'nd')] AmSn18940329-V09-13-page7.txt: [('GAZ', 'ETTEER')] AmSn18940405-V09-14-page2.txt: [('CA', 'NT')] AmSn18940405-V09-14-page7.txt: [('de', 'claring'), ('GAZ', 'ETTEER')] AmSn18940412-V09-15-page2.txt: [('a', 'll')] AmSn18940412-V09-15-page6.txt: [('reli', 'gious')] AmSn18940412-V09-15-page8.txt: [('w', 'ork')] AmSn18940419-V09-16-page7.txt: [('GAZ', 'ETTEER')] AmSn18940426-V09-17-page5.txt: [('Con', 'stitutional')] AmSn18940503-V09-18-page4.txt: [('u', 'nited')] AmSn18940503-V09-18-page5.txt: [('bay', 'onet')] AmSn18940503-V09-18-page7.txt: [('Y', 'es')] AmSn18940510-V09-19-page5.txt: [('SEN', 'TINEL'), ('author', 'ity')] AmSn18940524-V09-21-page6.txt: [('u', 'pon')] AmSn18940524-V09-21-page8.txt: [('to', 'co'), ('w', 'ith')] AmSn18940531-V09-22-page3.txt: [('present', 'ation')] AmSn18940607-V09-23-page6.txt: [('far', 'cical'), ('per', 'mitted')] AmSn18940614-V09-24-page2.txt: [('CON', 'STITUTIONAL')] AmSn18940614-V09-24-page3.txt: [('LIB', 'ERTY'), ('CON', 'TROL'), ('ins', 'tructions'), ('V', 'ulture')] AmSn18940614-V09-24-page7.txt: [('sub', 'stantial')] AmSn18940621-V09-25-page5.txt: [('SEN', 'TINEL')] AmSn18940621-V09-25-page7.txt: [('sub', 'stantial'), ('diction', 'ary')] AmSn18940628-V09-26-page3.txt: [('polit', 'ical')] AmSn18940628-V09-26-page5.txt: [('in', 'jects'), ('The', 'osophic')] AmSn18940712-V09-28-page1.txt: [('pro', 'duction'), ('govern', 'ment')] AmSn18940719-V09-29-page5.txt: [('he', 're')] AmSn18940726-V09-30-page1.txt: [('JON', 'ES')] AmSn18940802-V09-31-page4.txt: [('com', 'plex')] AmSn18940823-V09-33-page3.txt: [('SEN', 'TINEL')] AmSn18940823-V09-33-page4.txt: [('Switz', 'erland')] AmSn18940823-V09-33-page6.txt: [('Ch', 'ristian')] AmSn18940830-V09-34-page2.txt: [('e', 'ther')] AmSn18940830-V09-34-page5.txt: [('pro', 'duced')] AmSn18940830-V09-34-page7.txt: [('PRO', 'PHECY')] AmSn18940906-V09-35-page3.txt: [('shame', 'ful')] AmSn18940906-V09-35-page7.txt: [('GEN', 'ERAL')] AmSn18940913-V09-36-page3.txt: [('contra', 'dictions')] AmSn18940913-V09-36-page6.txt: [('M', 'inor')] AmSn18940920-V09-37-page2.txt: [('in', 'vite'), ('contra', 'ry')] AmSn18940920-V09-37-page3.txt: [('con', 'trol')] AmSn18940927-V09-38-page6.txt: [('H', 'AL'), ('A', 'NT')] AmSn18941004-V09-39-page1.txt: [('a', 'reli'), ('reli', 'gious')] AmSn18941004-V09-39-page4.txt: [('Depart', 'ment')] AmSn18941004-V09-39-page5.txt: [('s', 'ap'), ('la', 'gs'), ('E', 'gg'), ('P', 'ct')] AmSn18941004-V09-39-page7.txt: [('por', 'trayal'), ('A', 'tE'), ('congregation', 'al')] AmSn18941004-V09-39-page8.txt: [('a', 'id')] AmSn18941011-V09-40-page2.txt: [('bu', 'ilded')] AmSn18941018-V09-41-page2.txt: [('foun', 'dation')] AmSn18941025-V09-42-page6.txt: [('C', 'tr')] AmSn18941025-V09-42-page7.txt: [('regis', 'tered')] AmSn18941025-V09-42-page8.txt: [('AMER', 'ICAN')] AmSn18941115-V09-45-page2.txt: [('pro', 'phets')] AmSn18941115-V09-45-page4.txt: [('gov', 'ernment')] AmSn18941115-V09-45-page7.txt: [('pro', 'fusely')] AmSn18941115-V09-45-page8.txt: [('el', 'se'), ('to', 'co')] AmSn18941122-V09-46-page5.txt: [('well', 'es'), ('com', 'munity')] AmSn18941129-V09-47-page2.txt: [('enforce', 'th')] AmSn18941129-V09-47-page7.txt: [('R', 'eligious'), ('W', 'eekly')] AmSn18941213-V09-49-page3.txt: [('Pa', 'ternoster')] AmSn18941213-V09-49-page4.txt: [('f', 'ainteth')] AmSn18941220-V09-50-page10.txt: [('Bap', 'tists')] AmSn18941220-V09-50-page6.txt: [('to', 'ning'), ('the', 're'), ('sew', 'erage')] AmSn18941220-V09-50-page7.txt: [('stra', 'nger')] AmSn18941220-V09-50-page8.txt: [('SEN', 'TINEL')] AmSn18941220-V09-50-page9.txt: [('TO', 'TEM')] AmSn18950103-V10-01-page1.txt: [('SEN', 'TINEL'), ('ac', 'complished')] AmSn18950103-V10-01-page7.txt: [('H', 'ELPFUL')] AmSn18950110-V10-02-page1.txt: [('contempora', 'ry'), ('e', 'ternally'), ('pun', 'ishable')] AmSn18950110-V10-02-page2.txt: [('faith', 'ful')] AmSn18950110-V10-02-page4.txt: [('amend', 'ments')] AmSn18950117-V10-03-page7.txt: [('in', 'quirer')] AmSn18950117-V10-03-page8.txt: [('Prot', 'estant'), ('AMER', 'ICAN')] AmSn18950124-V10-04-page2.txt: [('Rom', 'anists')] AmSn18950124-V10-04-page3.txt: [('ques', 'tions')] AmSn18950124-V10-04-page7.txt: [('G', 'raphic'), ('W', 'ith')] AmSn18950124-V10-04-page8.txt: [('AMER', 'ICAN')] AmSn18950131-V10-05-page3.txt: [('M', 'KT')] AmSn18950131-V10-05-page4.txt: [('Le', 'Iter')] AmSn18950131-V10-05-page7.txt: [('SCRIP', 'TURAL'), ('Y', 'ork')] AmSn18950214-V10-07-page1.txt: [('SEN', 'TINEL')] AmSn18950214-V10-07-page4.txt: [('the', 're')] AmSn18950214-V10-07-page6.txt: [('Govern', 'ment')] AmSn18950221-V10-08-page1.txt: [('SEN', 'TINEL')] AmSn18950221-V10-08-page3.txt: [('SEN', 'TINEL')] AmSn18950221-V10-08-page4.txt: [('SEN', 'TINEL')] AmSn18950228-V10-09-page2.txt: [('a', 'pplication')] AmSn18950228-V10-09-page4.txt: [('ex', 'emptions')] AmSn18950228-V10-09-page6.txt: [('Se', 'dalia')] AmSn18950228-V10-09-page7.txt: [('I', 'CE')] AmSn18950307-V10-10-page8.txt: [('SEN', 'TINEL')] AmSn18950314-V10-11-page2.txt: [('Method', 'ist')] AmSn18950321-V10-12-page5.txt: [('so', 'ciety')] AmSn18950328-V10-13-page4.txt: [('improve', 'ments')] AmSn18950328-V10-13-page6.txt: [('to', 'ke')] AmSn18950328-V10-13-page7.txt: [('pro', 'fusely')] AmSn18950404-V10-14-page5.txt: [('peo', 'ple')] AmSn18950404-V10-14-page6.txt: [('in', 'quirer')] AmSn18950404-V10-14-page8.txt: [('edit', 'orial')] AmSn18950411-V10-15-page2.txt: [('de', 'nial')] AmSn18950411-V10-15-page5.txt: [('et', 'tA')] AmSn18950418-V10-16-page7.txt: [('pro', 'tem'), ('en', 'couraged')] AmSn18950425-V10-17-page1.txt: [('minis', 'ters')] AmSn18950425-V10-17-page3.txt: [('a', 'nd')] AmSn18950425-V10-17-page6.txt: [('B', 'argain')] AmSn18950425-V10-17-page7.txt: [('con', 'dition')] AmSn18950502-V10-18-page1.txt: [('state', 'ments')] AmSn18950502-V10-18-page5.txt: [('judi', 'cial')] AmSn18950502-V10-18-page6.txt: [('De', 'mand')] AmSn18950502-V10-18-page7.txt: [('to', 're')] AmSn18950509-V10-19-page3.txt: [('AMER', 'ICAN')] AmSn18950509-V10-19-page4.txt: [('ha', 've')] AmSn18950509-V10-19-page6.txt: [('we', 'ek')] AmSn18950516-V10-20-page4.txt: [('chris', 'tening')] AmSn18950523-V10-21-page1.txt: [('de', 'feated')] AmSn18950523-V10-21-page3.txt: [('esta', 'blished')] AmSn18950523-V10-21-page5.txt: [('big', 'otry')] AmSn18950523-V10-21-page7.txt: [('ToRI', 'ES'), ('diction', 'ary')] AmSn18950523-V10-21-page8.txt: [('Prot', 'estants'), ('ma', 'jority')] AmSn18950530-V10-22-page5.txt: [('SEN', 'TINEL')] AmSn18950530-V10-22-page8.txt: [('con', 'viction')] AmSn18950606-V10-23-page4.txt: [('LEG', 'ISLATION'), ('PUNISH', 'MENT'), ('PUN', 'ISHED'), ('CHRIS', 'TIAN')] AmSn18950606-V10-23-page8.txt: [('SEN', 'TINEL')] AmSn18950613-V10-24-page1.txt: [('Y', 'ork'), ('ques', 'tions')] AmSn18950613-V10-24-page7.txt: [('N', 'inety')] AmSn18950620-V10-25-page1.txt: [('an', 'cient')] AmSn18950620-V10-25-page2.txt: [('for', 'merly')] AmSn18950620-V10-25-page5.txt: [('in', 'sidious'), ('AMER', 'ICAN')] AmSn18950620-V10-25-page7.txt: [('A', 'ttractive'), ('A', 'merican'), ('Y', 'ork'), ('V', 'ersion')] AmSn18950627-V10-26-page3.txt: [('A', 'MI')] AmSn18950627-V10-26-page6.txt: [('lib', 'erty')] AmSn18950627-V10-26-page7.txt: [('Ad', 'ventists')] AmSn18950718-V10-29-page7.txt: [('a', 're')] AmSn18950718-V10-29-page8.txt: [('SEN', 'TINEL')] AmSn18950725-V10-30-page5.txt: [('depre', 'cated')] AmSn18950801-V10-31-page3.txt: [('Judge', 'th')] AmSn18950801-V10-31-page5.txt: [('REA', 'VIS')] AmSn18950815-V10-32-page8.txt: [('reli', 'gious')] AmSn18950829-V10-34-page4.txt: [('MOVE', 'MENT')] AmSn18950829-V10-34-page8.txt: [('SEN', 'TINEL')] AmSn18950905-V10-35-page2.txt: [('SEN', 'TINEL')] AmSn18950905-V10-35-page7.txt: [('B', 'IB')] AmSn18950919-V10-37-page5.txt: [('Cath', 'olic')] AmSn18950926-V10-38-page5.txt: [('in', 'fluence'), ('the', 're')] AmSn18950926-V10-38-page6.txt: [('E', 'LI')] AmSn18950926-V10-38-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER')] AmSn18951017-V10-41-page8.txt: [('a', 'mis')] AmSn18951024-V10-42-page7.txt: [('a', 'll'), ('A', 'Il'), ('all', 'ee'), ('T', 'ia')] AmSn18951031-V10-43-page6.txt: [('Men', 'stealers')] AmSn18951031-V10-43-page7.txt: [('i', 'ri'), ('n', 'orw'), ('endure', 'th'), ('e', 'lf')] AmSn18951107-V10-44-page3.txt: [('sal', 'vation')] AmSn18951107-V10-44-page7.txt: [('pro', 'fusely')] AmSn18951121-V10-46-page5.txt: [('reg', 'ular'), ('or', 'ganization')] AmSn18951121-V10-46-page7.txt: [('Y', 'ork')] AmSn18951128-V10-47-page6.txt: [('S', 'weet')] AmSn18951128-V10-47-page7.txt: [('o', 'ne')] AmSn18951128-V10-47-page8.txt: [('W', 'ine'), ('GEN', 'ERAL')] AmSn18960102-V11-01-page8.txt: [('SEN', 'TINEL')] AmSn18960109-V11-02-page7.txt: [('St', 'ates'), ('V', 'ery')] AmSn18960116-V11-03-page2.txt: [('gover', 'nment')] AmSn18960116-V11-03-page7.txt: [('pro', 'fusely')] AmSn18960130-V11-05-page6.txt: [('a', 'nd')] AmSn18960213-V11-07-page1.txt: [('ED', 'ITORS')] AmSn18960213-V11-07-page3.txt: [('dam', 'es')] AmSn18960220-V11-08-page2.txt: [('IN', 'SP')] AmSn18960220-V11-08-page4.txt: [('prop', 'erly')] AmSn18960220-V11-08-page5.txt: [('au', 'th'), ('of', 'ttimes')] AmSn18960220-V11-08-page6.txt: [('to', 'wns')] AmSn18960220-V11-08-page7.txt: [('nan', 'na'), ('sim', 'ple'), ('W', 'ith')] AmSn18960220-V11-08-page8.txt: [('SEN', 'TINEL'), ('SEN', "TINEL'S")] AmSn18960227-V11-09-page3.txt: [('attend', 'ance')] AmSn18960227-V11-09-page8.txt: [('AMER', 'ICAN')] AmSn18960305-V11-10-page1.txt: [('the', 'Ca')] AmSn18960305-V11-10-page6.txt: [('CA', 'NT')] AmSn18960305-V11-10-page7.txt: [('A', 'GA')] AmSn18960305-V11-10-page8.txt: [('b', 'Ra')] AmSn18960312-V11-11-page5.txt: [('AMER', 'ICAN')] AmSn18960312-V11-11-page7.txt: [('W', 'ith')] AmSn18960319-V11-12-page1.txt: [('e', 'cu')] AmSn18960319-V11-12-page3.txt: [('Chris', 'tianity')] AmSn18960326-V11-13-page5.txt: [('in', 'vaded')] AmSn18960326-V11-13-page7.txt: [('b', 'irders')] AmSn18960409-V11-15-page3.txt: [('fur', 'therance')] AmSn18960409-V11-15-page5.txt: [('ON', 'TARIO')] AmSn18960507-V11-19-page5.txt: [('de', 'mand')] AmSn18960514-V11-20-page3.txt: [('to', 'wn')] AmSn18960514-V11-20-page7.txt: [('CLA', 'SS'), ('She', 'lah')] AmSn18960521-V11-21-page3.txt: [('AD', 'VENTIST')] AmSn18960528-V11-22-page3.txt: [('pro', 'hibited')] AmSn18960604-V11-23-page1.txt: [('rec', 'eives')] AmSn18960604-V11-23-page2.txt: [('See', 'th')] AmSn18960604-V11-23-page4.txt: [('pro', 'hibition')] AmSn18960604-V11-23-page7.txt: [('Me', 'shullam'), ('a', 'Ne'), ('t', 'Ex'), ('nah', 'Ma'), ('Le', 'vites'), ('DAn', 'iel')] AmSn18960611-V11-24-page3.txt: [('En', 'deavor')] AmSn18960618-V11-25-page7.txt: [('go', 'ld')] AmSn18960702-V11-26-page1.txt: [('DEC', 'LARATION')] AmSn18960702-V11-26-page2.txt: [('be', 'lieve')] AmSn18960702-V11-26-page3.txt: [('Ref', 'orm')] AmSn18960702-V11-26-page7.txt: [('Pro', 'hibition'), ('perfect', 'ly'), ('Limit', 'ations')] AmSn18960716-V11-28-page2.txt: [('are', 'Ca')] AmSn18960716-V11-28-page4.txt: [('the', 're')] AmSn18960716-V11-28-page7.txt: [('Ha', 'shum'), ('V', 'al'), ('NEH', 'EMIAH'), ('r', 'Ex'), ('Ha', 'shub'), ('nah', 'Ma'), ('E', 'zra'), ('a', 'bi'), ('Le', 'vites')] AmSn18960723-V11-29-page1.txt: [('J', 'ULY')] AmSn18960723-V11-29-page5.txt: [('con', 'ventions')] AmSn18960723-V11-29-page7.txt: [('A', 'lian')] AmSn18960730-V11-30-page2.txt: [('des', 'tinies')] AmSn18960730-V11-30-page4.txt: [('Script', 'UreS')] AmSn18960806-V11-31-page2.txt: [('Dan', 'iel')] AmSn18960813-V11-32-page7.txt: [('jah', 'Az'), ('Ha', 'shub'), ('a', 'NS')] AmSn18960820-V11-33-page6.txt: [('now', 'adays')] AmSn18960820-V11-33-page7.txt: [('Le', 'vites'), ('a', 'Ne')] AmSn18960827-V11-34-page5.txt: [('T', 'wo')] AmSn18960827-V11-34-page7.txt: [('A', 'merican')] AmSn18960827-V11-34-page8.txt: [('a', 'ny')] AmSn18960903-V11-35-page2.txt: [('to', 're')] AmSn18960903-V11-35-page3.txt: [('a', 'mo')] AmSn18960903-V11-35-page5.txt: [('inter', 'esting')] AmSn18960903-V11-35-page7.txt: [('a', 'sa'), ('Le', 'vItes'), ('a', 'liah'), ('I', 'ra'), ('a', 'Ne'), ('He', 'zir')] AmSn18960910-V11-36-page3.txt: [('A', 'll'), ('as', 'semblies')] AmSn18960910-V11-36-page7.txt: [('E', 'zra')] AmSn18960917-V11-37-page2.txt: [('in', 'dictment'), ('d', 'iscourse')] AmSn18960917-V11-37-page3.txt: [('Demo', 'crat')] AmSn18960917-V11-37-page7.txt: [('G', 'aG'), ('nah', 'MA')] AmSn18960924-V11-38-page5.txt: [('Ruth', 'erford')] AmSn18960924-V11-38-page7.txt: [('Y', 'ork'), ('Le', 'vites')] AmSn18961015-V11-41-page1.txt: [('Cardin', 'al')] AmSn18961015-V11-41-page7.txt: [('t', 'ow')] AmSn18961015-V11-41-page8.txt: [('Aguas', 'Calientes')] AmSn18961022-V11-42-page1.txt: [('de', 'manded'), ('dema', 'nd')] AmSn18961022-V11-42-page3.txt: [('rec', 'ognized'), ('spir', 'itual')] AmSn18961022-V11-42-page4.txt: [('ME', 'th')] AmSn18961029-V11-43-page1.txt: [('per', 'se')] AmSn18961029-V11-43-page3.txt: [('CHRIS', 'TIAN')] AmSn18961029-V11-43-page7.txt: [('v', 'olumes'), ('W', 'ith')] AmSn18961029-V11-43-page8.txt: [('of', 'ficio')] AmSn18961105-V11-44-page2.txt: [('polit', 'ical')] AmSn18961105-V11-44-page7.txt: [('de', 'scription')] AmSn18961112-V11-45-page2.txt: [('SEN', 'TINEL'), ('A', 'dventists'), ('CIT', 'IZENSHIP')] AmSn18961119-V11-46-page3.txt: [('per', 'mit')] AmSn18961119-V11-46-page7.txt: [('Le', 'vites')] AmSn18961126-V11-47-page4.txt: [('AMER', 'ICAN'), ('GOVERN', 'MENT')] AmSn18961126-V11-47-page5.txt: [('SEN', 'TINEL')] AmSn18961126-V11-47-page6.txt: [('Su', 'nday')] AmSn18961126-V11-47-page8.txt: [('SEN', 'TINEL')] AmSn18961203-V11-48-page5.txt: [('Chris', 'tian')] AmSn18961203-V11-48-page6.txt: [('SEN', 'TINEL'), ('Christian', 'ity')] AmSn18961203-V11-48-page7.txt: [('N', 'th'), ('a', 'NS')] AmSn18961217-V11-50-page6.txt: [('e', 'arly')] AmSn18961217-V11-50-page7.txt: [('K', 'ey'), ('a', 'Ne')] AmSn18961224-V11-51-page1.txt: [('s', 'ubterfuge')] AmSn18961224-V11-51-page3.txt: [('de', 'manding')] AmSn18961224-V11-51-page4.txt: [('storekee', 'pers')] AmSn18961224-V11-51-page6.txt: [('A', 'Mt')] AmSn18961224-V11-51-page7.txt: [('import', 'ance'), ('Ba', 'ni'), ('i', 'll'), ('a', 'Ne'), ('HA', 'sh'), ('Ha', 'rIph'), ('Le', 'vites')] AmSn18961224-V11-51-page8.txt: [('SEN', 'TINEL')] AmSn18970107-V12-01-page10.txt: [('repudi', 'ated')] AmSn18970107-V12-01-page16.txt: [('Le', 'vites'), ('A', 'MERICA')] AmSn18970114-V12-02-page14.txt: [('A', 'MERICAN')] AmSn18970114-V12-02-page4.txt: [('entertainmen', 'ts')] AmSn18970121-V12-03-page1.txt: [('A', 'LONZO')] AmSn18970121-V12-03-page13.txt: [('Govern', 'ment')] AmSn18970121-V12-03-page7.txt: [('a', 'nd')] AmSn18970121-V12-03-page8.txt: [('the', 'Ca'), ('Christia', 'nity')] AmSn18970128-V12-04-page1.txt: [('a', 'nd')] AmSn18970128-V12-04-page13.txt: [('Pry', "or's")] AmSn18970128-V12-04-page6.txt: [('work', 'ers'), ('pres', 'ent')] AmSn18970204-V12-05-page14.txt: [('sub', 'ject')] AmSn18970204-V12-05-page4.txt: [('P', 'Al')] AmSn18970204-V12-05-page6.txt: [('sus', 'tained')] AmSn18970204-V12-05-page9.txt: [('forma', 'tion')] AmSn18970211-V12-06-page14.txt: [('AMER', 'ICAN')] AmSn18970211-V12-06-page2.txt: [('DE', 'CISIONS')] AmSn18970211-V12-06-page6.txt: [('de', 'cided')] AmSn18970211-V12-06-page8.txt: [('cir', 'cumspection')] AmSn18970218-V12-07-page15.txt: [('in', 'terest')] AmSn18970218-V12-07-page16.txt: [('nah', 'MA')] AmSn18970225-V12-08-page1.txt: [('A', 'LONZO')] AmSn18970225-V12-08-page11.txt: [('SEC', 'TIONS')] AmSn18970225-V12-08-page15.txt: [('contin', 'ual')] AmSn18970225-V12-08-page4.txt: [('End', 'eavorer')] AmSn18970225-V12-08-page6.txt: [('sub', 'ject'), ('pla', 'ces')] AmSn18970304-V12-09-page10.txt: [('A', 'MERICAN')] AmSn18970311-V12-10-page14.txt: [('m', 'uch')] AmSn18970311-V12-10-page2.txt: [('exam', 'ine')] AmSn18970311-V12-10-page7.txt: [('E', 'RI')] AmSn18970318-V12-11-page14.txt: [('Kan', 'sas')] AmSn18970318-V12-11-page15.txt: [('P', 'ACIFIC'), ('in', 'terest')] AmSn18970318-V12-11-page7.txt: [('con', 'demned')] AmSn18970318-V12-11-page8.txt: [('J', 'oash')] AmSn18970325-V12-12-page10.txt: [('dispe', 'nse')] AmSn18970325-V12-12-page16.txt: [('W', 'ith'), ('sim', 'ple')] AmSn18970325-V12-12-page3.txt: [('im', 'portance')] AmSn18970401-V12-13-page15.txt: [('P', 'olitical')] AmSn18970401-V12-13-page16.txt: [('W', 'ith')] AmSn18970401-V12-13-page2.txt: [('con', 'cerning'), ('ChriS', 'tians')] AmSn18970401-V12-13-page7.txt: [('r', 'um')] AmSn18970401-V12-13-page8.txt: [('a', 'sk')] AmSn18970408-V12-14-page10.txt: [('p', 'urely')] AmSn18970408-V12-14-page11.txt: [('SEN', 'TINEL')] AmSn18970408-V12-14-page15.txt: [('pro', 'greSSed')] AmSn18970408-V12-14-page16.txt: [('W', 'ith'), ('M', 'RS')] AmSn18970408-V12-14-page9.txt: [('fundament', 'al')] AmSn18970415-V12-15-page2.txt: [('ma', 'jority')] AmSn18970415-V12-15-page4.txt: [('appoint', 'ment')] AmSn18970415-V12-15-page9.txt: [('a', 'nd')] AmSn18970422-V12-16-page13.txt: [('spirit', 'ual')] AmSn18970422-V12-16-page7.txt: [('t', 'wo')] AmSn18970429-V12-17-page1.txt: [('a', 'reli')] AmSn18970429-V12-17-page10.txt: [('Is', 'rael')] AmSn18970429-V12-17-page11.txt: [('to', 'es')] AmSn18970429-V12-17-page15.txt: [('in', 'terest')] AmSn18970429-V12-17-page16.txt: [('N', 'th'), ('Le', 'vites')] AmSn18970429-V12-17-page4.txt: [('d', 'ay')] AmSn18970506-V12-18-page1.txt: [('sim', 'plicity')] AmSn18970506-V12-18-page12.txt: [('en', 'forcement')] AmSn18970506-V12-18-page13.txt: [('god', 'liness'), ('Chris', 'tian')] AmSn18970506-V12-18-page4.txt: [('degrada', 'tion')] AmSn18970513-V12-19-page12.txt: [('as', 'certaining')] AmSn18970513-V12-19-page13.txt: [('inter', 'rupted')] AmSn18970513-V12-19-page4.txt: [('pro', 'mulgation')] AmSn18970513-V12-19-page9.txt: [('hun', 'dreds')] AmSn18970520-V12-20-page1.txt: [('cor', 'ruptible')] AmSn18970520-V12-20-page14.txt: [('w', 'ith'), ('m', 'etal')] AmSn18970520-V12-20-page15.txt: [('in', 'terest'), ('M', 'atthew')] AmSn18970520-V12-20-page9.txt: [('a', 'ngel'), ('sin', 'gle'), ('a', 'bsolutely')] AmSn18970527-V12-21-page1.txt: [('k', 'OA')] AmSn18970527-V12-21-page14.txt: [('f', 'ORK')] AmSn18970527-V12-21-page15.txt: [('in', 'terest')] AmSn18970603-V12-22-page11.txt: [('in', 'dulged')] AmSn18970603-V12-22-page13.txt: [('a', 'nd'), ('con', 'cerned')] AmSn18970603-V12-22-page14.txt: [('m', 'onths')] AmSn18970603-V12-22-page16.txt: [('W', 'ith')] AmSn18970603-V12-22-page6.txt: [('d', 'oors')] AmSn18970603-V12-22-page7.txt: [('a', 'll')] AmSn18970603-V12-22-page8.txt: [('In', 'asmuch'), ('m', 'em')] AmSn18970610-V12-23-page13.txt: [('de', 'nominational')] AmSn18970610-V12-23-page5.txt: [('in', 'struction')] AmSn18970610-V12-23-page6.txt: [('pe', 'culiar')] AmSn18970610-V12-23-page9.txt: [('per', 'se')] AmSn18970617-V12-24-page3.txt: [('A', 'MERICAN')] AmSn18970624-V12-25-page4.txt: [('right', 'eousness')] AmSn18970624-V12-25-page6.txt: [('Fed', 'eral')] AmSn18970701-V12-26-page11.txt: [('con', 'vention')] AmSn18970701-V12-26-page14.txt: [('SEN', 'TINEL')] AmSn18970701-V12-26-page2.txt: [('dis', 'tinctly')] AmSn18970701-V12-26-page3.txt: [('a', 'nd')] AmSn18970707-V12-27-page11.txt: [('Hin', 'doos')] AmSn18970707-V12-27-page14.txt: [('in', 'terest'), ('cal', 'ender')] AmSn18970707-V12-27-page15.txt: [('G', 'od')] AmSn18970707-V12-27-page4.txt: [('SEN', 'TINEL')] AmSn18970707-V12-27-page6.txt: [('govern', 'ments')] AmSn18970715-V12-28-page13.txt: [('A', 'MERICAN')] AmSn18970715-V12-28-page14.txt: [('in', 'terest')] AmSn18970715-V12-28-page7.txt: [('ha', 'th'), ('h', 'ome')] AmSn18970715-V12-28-page8.txt: [('sig', 'nificant')] AmSn18970722-V12-29-page1.txt: [('ex', 'cept')] AmSn18970722-V12-29-page10.txt: [('consti', 'tutional')] AmSn18970722-V12-29-page14.txt: [('in', 'terest')] AmSn18970722-V12-29-page5.txt: [('the', 'se')] AmSn18970729-V12-30-page14.txt: [('in', 'terest')] AmSn18970729-V12-30-page16.txt: [('W', 'ith'), ('sim', 'ple')] AmSn18970805-V12-31-page1.txt: [('prof', 'itable')] AmSn18970805-V12-31-page10.txt: [('a', 'nd')] AmSn18970805-V12-31-page13.txt: [('SEN', 'TINEL')] AmSn18970805-V12-31-page14.txt: [('in', 'terest')] AmSn18970812-V12-32-page15.txt: [('Y', 'ork')] AmSn18970812-V12-32-page6.txt: [('con', 'fidently')] AmSn18970812-V12-32-page7.txt: [('SEN', 'TINEL')] AmSn18970819-V12-33-page14.txt: [('con', 'nected'), ('Y', 'ork')] AmSn18970819-V12-33-page16.txt: [('W', 'ith'), ('sim', 'ple')] AmSn18970819-V12-33-page2.txt: [('a', 're'), ('Chris', 'tians')] AmSn18970819-V12-33-page5.txt: [('Cir', 'cumcision')] AmSn18970909-V12-35-page14.txt: [('Y', 'ork')] AmSn18970909-V12-35-page16.txt: [('m', 'oth')] AmSn18970916-V12-36-page4.txt: [('y', 'ou')] AmSn18970916-V12-36-page6.txt: [('inte', 'nts')] AmSn18970923-V12-37-page12.txt: [('disap', 'pointments')] AmSn18970923-V12-37-page13.txt: [('Y', 'ork')] AmSn18970923-V12-37-page14.txt: [('pub', 'lished')] AmSn18970923-V12-37-page15.txt: [('T', 'ao'), ('a', 'ce')] AmSn18970923-V12-37-page16.txt: [('n', 'ote'), ('n', 'otes')] AmSn18970923-V12-37-page4.txt: [('be', 'ng'), ('in', 'terests')] AmSn18970923-V12-37-page6.txt: [('a', 'nd')] AmSn18970923-V12-37-page8.txt: [('real', 'ization')] AmSn18970923-V12-37-page9.txt: [('polit', 'ical'), ('de', 'termined')] AmSn18970930-V12-38-page13.txt: [('SEN', 'TINEL')] AmSn18970930-V12-38-page16.txt: [('ra', 'ca'), ('ho', 'Ts'), ('g', 'rin')] AmSn18970930-V12-38-page4.txt: [('con', 'vention'), ('in', 'toxicated')] AmSn18970930-V12-38-page6.txt: [('A', 'MERICAN')] AmSn18971007-V12-39-page13.txt: [('ad', 'vocate')] AmSn18971007-V12-39-page9.txt: [('Amer', 'ican')] AmSn18971014-V12-40-page16.txt: [('p', 'PM'), ('I', 'ce'), ('i', 'SL'), ('w', 'ei'), ('s', 'Om'), ('I', 're'), ('m', 'id')] AmSn18971021-V12-41-page12.txt: [('won', 'dered')] AmSn18971021-V12-41-page13.txt: [('j', 'oy')] AmSn18971021-V12-41-page14.txt: [('Dic', 'tionary'), ('the', 're')] AmSn18971021-V12-41-page16.txt: [('a', 'ka'), ('A', 'ddress'), ('R', 'cd'), ('a', 'ft'), ('I', 're')] AmSn18971021-V12-41-page2.txt: [('attend', 'ance')] AmSn18971021-V12-41-page5.txt: [('im', 'agined')] AmSn18971021-V12-41-page6.txt: [('d', 'welleth')] AmSn18971021-V12-41-page9.txt: [('A', 'VER')] AmSn18971028-V12-42-page1.txt: [('SEN', 'TINEL')] AmSn18971028-V12-42-page14.txt: [('in', 'terest')] AmSn18971028-V12-42-page16.txt: [('a', 'Ne'), ('a', 'li')] AmSn18971028-V12-42-page2.txt: [('SEN', 'TINEL')] AmSn18971028-V12-42-page9.txt: [('a', 'nd')] AmSn18971104-V12-43-page14.txt: [('in', 'terest')] AmSn18971104-V12-43-page16.txt: [('a', 'Ne')] AmSn18971104-V12-43-page3.txt: [('as', 'serted'), ('di', 'rection')] AmSn18971104-V12-43-page9.txt: [('Insp', 'iration')] AmSn18971111-V12-44-page15.txt: [('in', 'ca')] AmSn18971111-V12-44-page16.txt: [('Me', 'shullam'), ('a', 'Ne')] AmSn18971111-V12-44-page2.txt: [('a', 'nd')] AmSn18971111-V12-44-page8.txt: [('a', 'bundantly')] AmSn18971118-V12-45-page14.txt: [('DiCtion', 'ary')] AmSn18971118-V12-45-page15.txt: [('in', 'ti'), ('Kan', 'sas'), ('Com', 'pany')] AmSn18971118-V12-45-page16.txt: [('Som', 'ething'), ('a', 'Ne')] AmSn18971125-V12-46-page10.txt: [('im', 'portant')] AmSn18971125-V12-46-page11.txt: [('We', 'll'), ('We', 're')] AmSn18971125-V12-46-page14.txt: [('A', 'pril')] AmSn18971125-V12-46-page15.txt: [('fur', 'nished')] AmSn18971125-V12-46-page16.txt: [('a', 'Ne')] AmSn18971125-V12-46-page2.txt: [('En', 'deavor')] AmSn18971125-V12-46-page3.txt: [('in', 'stitution')] AmSn18971202-V12-47-page1.txt: [('Chris', 'tian')] AmSn18971202-V12-47-page12.txt: [('alw', 'ays')] AmSn18971202-V12-47-page7.txt: [('a', 'nd')] AmSn18971209-V12-48-page14.txt: [('Qual', 'ity'), ('in', 'ca')] AmSn18971209-V12-48-page16.txt: [('ah', 'Az'), ('a', 'Ne')] AmSn18971209-V12-48-page6.txt: [('pro', 'fession')] AmSn18971209-V12-48-page7.txt: [('P', 'rotestants')] AmSn18971216-V12-49-page13.txt: [('Com', 'bination')] AmSn18971216-V12-49-page14.txt: [('de', 'scriptive'), ('in', 'ca')] AmSn18971216-V12-49-page16.txt: [('H', 'eadband')] AmSn18971216-V12-49-page6.txt: [('the', 're')] AmSn18971216-V12-49-page7.txt: [('per', 'se')] AmSn18971216-V12-49-page9.txt: [('ex', 'pire')] AmSn18971230-V12-50-page1.txt: [('A', 'licia')] AmSn18971230-V12-50-page14.txt: [('in', 'terest')] AmSn18971230-V12-50-page4.txt: [('occa', 'sions')] AmSn18971230-V12-50-page9.txt: [('b', 'ast')] AmSn18980106-V13-01-page2.txt: [('ex', 'pected'), ('be', 'lieve')] AmSn18980113-V13-02-page1.txt: [('l', 'imn'), ('in', 'stincts')] AmSn18980113-V13-02-page13.txt: [('SEN', 'TINEL')] AmSn18980113-V13-02-page16.txt: [('a', 'NS')] AmSn18980113-V13-02-page4.txt: [('Chri', 'stian')] AmSn18980113-V13-02-page5.txt: [('pos', 'sible')] AmSn18980113-V13-02-page7.txt: [('a', 'ttending')] AmSn18980120-V13-03-page15.txt: [('in', 'terest')] AmSn18980120-V13-03-page16.txt: [('Th', 'ey'), ('s', 'itz')] AmSn18980120-V13-03-page6.txt: [('de', 'cided')] AmSn18980127-V13-04-page14.txt: [('in', 'terest'), ('Cat', 'arrh')] AmSn18980127-V13-04-page16.txt: [('ah', 'Az'), ('T', 'OW')] AmSn18980127-V13-04-page2.txt: [('Congregation', 'alist')] AmSn18980127-V13-04-page4.txt: [('per', 'suaded')] AmSn18980203-V13-05-page14.txt: [('med', 'icine')] AmSn18980203-V13-05-page5.txt: [('r', 'espect')] AmSn18980210-V13-06-page12.txt: [('su', 're')] AmSn18980210-V13-06-page14.txt: [('in', 'terest')] AmSn18980210-V13-06-page16.txt: [('the', 'se'), ('Con', 'gress')] AmSn18980210-V13-06-page5.txt: [('Us', 'urped')] AmSn18980210-V13-06-page9.txt: [('lib', 'erty'), ('estab', 'lished'), ('re', 'gards')] AmSn18980217-V13-07-page14.txt: [('in', 'terest'), ('in', 'ca')] AmSn18980217-V13-07-page15.txt: [('a', 'rak')] AmSn18980217-V13-07-page3.txt: [('pro', 'fession')] AmSn18980217-V13-07-page7.txt: [('A', 'MERICAN')] AmSn18980224-V13-08-page14.txt: [('in', 'terest'), ('Wagon', 'ettes'), ('W', 'ig')] AmSn18980224-V13-08-page15.txt: [('f', 'orgo')] AmSn18980224-V13-08-page8.txt: [('SEN', 'TINEL')] AmSn18980224-V13-08-page9.txt: [('fa', 'tuus')] AmSn18980303-V13-09-page14.txt: [('in', 'terest')] AmSn18980303-V13-09-page15.txt: [('judg', 'ments')] AmSn18980303-V13-09-page4.txt: [('mer', 'rily')] AmSn18980303-V13-09-page6.txt: [('in', 'clined'), ('Gov', 'ernment')] AmSn18980310-V13-10-page14.txt: [('A', 'ddress'), ('g', 'od')] AmSn18980310-V13-10-page15.txt: [('a', 'Ne')] AmSn18980310-V13-10-page16.txt: [('inf', 'ormed')] AmSn18980310-V13-10-page2.txt: [('sev', 'eral')] AmSn18980310-V13-10-page7.txt: [('J', 'erusalem'), ('appear', 'ance')] AmSn18980310-V13-10-page8.txt: [('faith', 'ful')] AmSn18980317-V13-11-page14.txt: [('A', 'loth')] AmSn18980317-V13-11-page15.txt: [('for', 'ES'), ('a', 'Ne')] AmSn18980317-V13-11-page8.txt: [('act', 'uated')] AmSn18980324-V13-12-page12.txt: [('see', 'th')] AmSn18980324-V13-12-page15.txt: [('a', 'Ne')] AmSn18980324-V13-12-page3.txt: [('gov', 'ernment')] AmSn18980324-V13-12-page8.txt: [('Chris', 'tian')] AmSn18980331-V13-13-page14.txt: [('in', 'terest')] AmSn18980331-V13-13-page15.txt: [('a', 'Ne'), ('ill', 'ustrations')] AmSn18980331-V13-13-page16.txt: [('w', 'ould')] AmSn18980407-V13-14-page14.txt: [('in', 'terest')] AmSn18980407-V13-14-page15.txt: [('f', 'orgo')] AmSn18980414-V13-15-page10.txt: [('like', 'th')] AmSn18980414-V13-15-page14.txt: [('in', 'terest'), ('in', 'ca')] AmSn18980414-V13-15-page15.txt: [('A', 'Mt'), ('Le', 'vites'), ('a', 'Ne')] AmSn18980414-V13-15-page3.txt: [('st', 'atement')] AmSn18980414-V13-15-page4.txt: [('sac', 'rificed')] AmSn18980414-V13-15-page6.txt: [('fun', 'damental')] AmSn18980421-V13-16-page14.txt: [('in', 'terest')] AmSn18980428-V13-17-page12.txt: [('au', 'thorities')] AmSn18980428-V13-17-page13.txt: [('a', 'reli'), ('r', 'om'), ('fur', 'nished')] AmSn18980428-V13-17-page14.txt: [('Kan', 'sas'), ('Com', 'pany'), ('in', 'terest')] AmSn18980428-V13-17-page16.txt: [('Amer', 'icans')] AmSn18980428-V13-17-page3.txt: [('exam', 'ple')] AmSn18980428-V13-17-page5.txt: [('A', 'MERICAN')] AmSn18980505-V13-18-page13.txt: [('mod', 'ern')] AmSn18980505-V13-18-page14.txt: [('in', 'terest')] AmSn18980505-V13-18-page5.txt: [('a', 'nd')] AmSn18980512-V13-19-page14.txt: [('DI', 'ES')] AmSn18980519-V13-20-page1.txt: [('Chris', 'tian')] AmSn18980519-V13-20-page14.txt: [('especial', 'ly')] AmSn18980519-V13-20-page2.txt: [('a', 'id')] AmSn18980519-V13-20-page6.txt: [('con', 'cern')] AmSn18980519-V13-20-page7.txt: [('part', 'nership')] AmSn18980526-V13-21-page14.txt: [('In', 'terest'), ('especial', 'ly')] AmSn18980526-V13-21-page5.txt: [('meth', 'ods')] AmSn18980602-V13-22-page14.txt: [('especial', 'ly')] AmSn18980602-V13-22-page15.txt: [('Add', 'ress')] AmSn18980602-V13-22-page3.txt: [('par', 'tial')] AmSn18980602-V13-22-page7.txt: [('s', 'un')] AmSn18980609-V13-23-page14.txt: [('Com', 'pany'), ('especial', 'ly'), ('HA', 'YS')] AmSn18980609-V13-23-page15.txt: [('i', 'ri'), ('a', 're'), ('a', 'il'), ('e', 'gg')] AmSn18980609-V13-23-page2.txt: [('perma', 'nent'), ('per', 'manent')] AmSn18980609-V13-23-page3.txt: [('ques', 'tion')] AmSn18980609-V13-23-page9.txt: [('Minis', "ters'")] AmSn18980616-V13-24-page10.txt: [('Chris', 'tian')] AmSn18980616-V13-24-page13.txt: [('w', 'ork'), ('fur', 'nished')] AmSn18980616-V13-24-page14.txt: [('Com', 'pany'), ('especial', 'ly')] AmSn18980616-V13-24-page15.txt: [('i', 'Cel'), ('Er', 'ie'), ('m', 'oi')] AmSn18980616-V13-24-page2.txt: [('Colo', 'nel')] AmSn18980616-V13-24-page7.txt: [('an', 'swer')] AmSn18980623-V13-25-page14.txt: [('in', 'terest'), ('V', 'aluable'), ('especial', 'ly')] AmSn18980623-V13-25-page15.txt: [('a', 'll'), ('b', 'ecome'), ('E', 'ra'), ('not', 'ch')] AmSn18980623-V13-25-page16.txt: [('Bis', 'marck')] AmSn18980630-V13-26-page11.txt: [('Eng', 'lish')] AmSn18980630-V13-26-page14.txt: [('especial', 'ly')] AmSn18980630-V13-26-page15.txt: [('to', 'Il'), ('H', 'ay'), ('i', 'va')] AmSn18980630-V13-26-page6.txt: [('cor', 'ruptions')] AmSn18980630-V13-26-page9.txt: [('a', 'lready')] AmSn18980714-V13-27-page14.txt: [('especial', 'ly')] AmSn18980714-V13-27-page15.txt: [('I', 're'), ('to', 'Co'), ('r', 'CD'), ('th', 'ese'), ('O', 'ra')] AmSn18980714-V13-27-page2.txt: [('evan', 'gelical'), ('be', 'lieve')] AmSn18980714-V13-27-page3.txt: [('so', 'ul')] AmSn18980714-V13-27-page7.txt: [('IS', 'TH')] AmSn18980721-V13-28-page10.txt: [('Preside', 'nt')] AmSn18980721-V13-28-page17.txt: [('to', 'ry')] AmSn18980721-V13-28-page19.txt: [('especial', 'ly')] AmSn18980721-V13-28-page6.txt: [('f', 'ol')] AmSn18980728-V13-29-page10.txt: [('sub', 'jects')] AmSn18980728-V13-29-page14.txt: [('especial', 'ly')] AmSn18980728-V13-29-page7.txt: [('symbol', 'ized')] AmSn18980804-V13-30-page14.txt: [('especial', 'ly')] AmSn18980804-V13-30-page4.txt: [('ecclesias', 'tically')] AmSn18980804-V13-30-page8.txt: [('ac', 'cept')] AmSn18980811-V13-31-page12.txt: [('belie', 'veth')] AmSn18980811-V13-31-page14.txt: [('especial', 'ly')] AmSn18980811-V13-31-page16.txt: [('en', 'gagement')] AmSn18980811-V13-31-page7.txt: [('the', 'se')] AmSn18980818-V13-32-page14.txt: [('especial', 'ly')] AmSn18980818-V13-32-page6.txt: [('crim', 'inal')] AmSn18980825-V13-33-page1.txt: [('as', 'cend')] AmSn18980825-V13-33-page7.txt: [('do', 'th')] AmSn18980825-V13-33-page8.txt: [('a', 're')] AmSn18980901-V13-34-page4.txt: [('in', 'itiative')] AmSn18980908-V13-35-page12.txt: [('right', 'ful')] AmSn18980908-V13-35-page2.txt: [('be', 'seeching')] AmSn18980908-V13-35-page7.txt: [('Equal', 'ity')] AmSn18980915-V13-36-page15.txt: [('P', 'ress')] AmSn18980915-V13-36-page2.txt: [('prop', 'erly')] AmSn18980922-V13-37-page14.txt: [('especial', 'ly')] AmSn18980922-V13-37-page15.txt: [('cap', 'tivity')] AmSn18980922-V13-37-page16.txt: [('for', 'eign'), ('Gov', 'ernment')] AmSn18980922-V13-37-page5.txt: [('ad', 'venturers')] AmSn18980922-V13-37-page6.txt: [('Cath', 'olics')] AmSn18980929-V13-38-page13.txt: [('It', "'s")] AmSn18980929-V13-38-page14.txt: [('especial', 'ly')] AmSn18980929-V13-38-page15.txt: [('cap', 'tivity')] AmSn18980929-V13-38-page2.txt: [('the', 'Ca'), ('Ca', 'tholic')] AmSn18980929-V13-38-page9.txt: [('SEN', 'TINEL')] AmSn18981006-V13-39-page1.txt: [('GOVERN', 'MENT')] AmSn18981006-V13-39-page14.txt: [('cap', 'tivity')] AmSn18981006-V13-39-page15.txt: [('especial', 'ly')] AmSn18981013-V13-40-page14.txt: [('M', 'OD')] AmSn18981013-V13-40-page15.txt: [('cap', 'tivity')] AmSn18981013-V13-40-page16.txt: [('ten', 'dency')] AmSn18981020-V13-41-page11.txt: [('enforce', 'ment')] AmSn18981020-V13-41-page14.txt: [('cap', 'tivity'), ('In', 'terpretation'), ('especial', 'ly')] AmSn18981020-V13-41-page3.txt: [('familiar', 'ity')] AmSn18981027-V13-42-page15.txt: [('cap', 'tivity'), ('r', 'om'), ('especial', 'ly')] AmSn18981103-V13-43-page11.txt: [('con', 'flict')] AmSn18981103-V13-43-page14.txt: [('cap', 'tivity'), ('Kan', 'sas'), ('especial', 'ly')] AmSn18981110-V13-44-page15.txt: [('cap', 'tivity'), ('especial', 'ly')] AmSn18981110-V13-44-page6.txt: [('politi', 'cian')] AmSn18981110-V13-44-page9.txt: [('author', 'ity')] AmSn18981117-V13-45-page1.txt: [('spirit', 'ual')] AmSn18981117-V13-45-page14.txt: [('cap', 'tivity'), ('especial', 'ly')] AmSn18981124-V13-46-page15.txt: [('cap', 'tivity'), ('in', 'terpretation'), ('especial', 'ly')] AmSn18981201-V13-47-page14.txt: [('cap', 'tivity'), ('In', 'terpretation'), ('especial', 'ly')] AmSn18981201-V13-47-page16.txt: [('the', 'es'), ('reg', 'ular')] AmSn18981201-V13-47-page3.txt: [('Cath', 'olic'), ('dig', 'nitaries')] AmSn18981201-V13-47-page5.txt: [('inter', 'ests')] AmSn18981201-V13-47-page9.txt: [('in', 'genious')] AmSn18981208-V13-48-page13.txt: [('engr', 'avings')] AmSn18981208-V13-48-page14.txt: [('r', 'io')] AmSn18981208-V13-48-page15.txt: [('cap', 'tivity')] AmSn18981208-V13-48-page7.txt: [('hap', 'piness')] AmSn18981215-V13-49-page1.txt: [('GOVERN', 'MENT'), ('gov', 'ernment')] AmSn18981215-V13-49-page14.txt: [('cap', 'tivity'), ('especial', 'ly')] AmSn18981215-V13-49-page4.txt: [('ques', 'tion')] AmSn18981215-V13-49-page5.txt: [('dis', 'seminate')] AmSn18981229-V13-50-page14.txt: [('cap', 'tivity'), ('c', 'onn'), ('In', 'terpretation'), ('especial', 'ly')] AmSn18981229-V13-50-page15.txt: [('C', 'itation')] AmSn18981229-V13-50-page16.txt: [('ha', 'ndier')] AmSn18981229-V13-50-page2.txt: [('go', 'vernment')] AmSn18981229-V13-50-page3.txt: [('imperial', 'ist')] AmSn18981229-V13-50-page7.txt: [('and', 're')] AmSn18990105-V14-01-page15.txt: [('fascin', 'ating')] AmSn18990105-V14-01-page2.txt: [('Con', 'gress')] AmSn18990105-V14-01-page5.txt: [('cen', 'tury')] AmSn18990112-V14-02-page14.txt: [('engr', 'avings')] AmSn18990112-V14-02-page15.txt: [('fa', 'mily')] AmSn18990119-V14-03-page10.txt: [('p', 'urpose')] AmSn18990119-V14-03-page14.txt: [('un', 'derlying')] AmSn18990126-V14-04-page11.txt: [('a', 'nd')] AmSn18990126-V14-04-page13.txt: [('TO', 'OtER')] AmSn18990126-V14-04-page15.txt: [('fa', 'mily'), ('C', 'itation')] AmSn18990202-V14-05-page1.txt: [('PRO', 'FESSION')] AmSn18990202-V14-05-page14.txt: [('r', 'avings')] AmSn18990202-V14-05-page15.txt: [('t', 'itles'), ('N', 'ev'), ('C', 'itation')] AmSn18990202-V14-05-page16.txt: [('SEN', 'TINEL')] AmSn18990202-V14-05-page9.txt: [('We', 'll')] AmSn18990209-V14-06-page12.txt: [('out', 'generaled')] AmSn18990209-V14-06-page14.txt: [('cap', 'tivity')] AmSn18990209-V14-06-page8.txt: [('to', 'ne')] AmSn18990216-V14-07-page14.txt: [('cap', 'tivity')] AmSn18990223-V14-08-page14.txt: [('cap', 'tivity')] AmSn18990302-V14-09-page16.txt: [('SEN', 'TINEL')] AmSn18990302-V14-09-page6.txt: [('ver', 'acity')] AmSn18990302-V14-09-page9.txt: [('w', 'hich')] AmSn18990309-V14-10-page1.txt: [('voice', 'ful')] AmSn18990309-V14-10-page15.txt: [('P', 'RESS')] AmSn18990309-V14-10-page16.txt: [('SEN', 'TINEL')] AmSn18990309-V14-10-page6.txt: [('de', 'clared')] AmSn18990316-V14-11-page15.txt: [('g', 'oo')] AmSn18990316-V14-11-page5.txt: [('and', 're')] AmSn18990316-V14-11-page6.txt: [('lib', 'erty')] AmSn18990323-V14-12-page11.txt: [('en', 'gaged')] AmSn18990323-V14-12-page15.txt: [('fa', 'mily'), ('O', 'LD')] AmSn18990323-V14-12-page2.txt: [('gov', 'ernment')] AmSn18990323-V14-12-page5.txt: [('Christian', 'ity')] AmSn18990330-V14-13-page10.txt: [('sold', 'iers')] AmSn18990406-V14-14-page14.txt: [('A', 'bby')] AmSn18990406-V14-14-page8.txt: [('in', 'vestigations')] AmSn18990413-V14-15-page14.txt: [('at', 'Li')] AmSn18990413-V14-15-page16.txt: [('who', 'se')] AmSn18990420-V14-16-page4.txt: [('min', 'ister')] AmSn18990427-V14-17-page13.txt: [('chap', 'lains')] AmSn18990427-V14-17-page8.txt: [('f', 'ollowing')] AmSn18990504-V14-18-page15.txt: [('scien', 'tific')] AmSn18990511-V14-19-page14.txt: [('Sanct', 'uary'), ('phys', 'ical'), ('FOR', 'EIGN')] AmSn18990518-V14-20-page10.txt: [('pro', 'hibit')] AmSn18990518-V14-20-page14.txt: [('A', 'li'), ('FOR', 'EIGN')] AmSn18990525-V14-21-page1.txt: [('p', 'erson')] AmSn18990525-V14-21-page15.txt: [('T', 'iP')] AmSn18990601-V14-22-page1.txt: [('A', 'verted')] AmSn18990601-V14-22-page11.txt: [('com', 'pelled')] AmSn18990601-V14-22-page14.txt: [('tim', 'es'), ('mission', 'ary')] AmSn18990601-V14-22-page15.txt: [('A', 'rIAN')] AmSn18990608-V14-23-page13.txt: [('Sanct', 'uary')] AmSn18990608-V14-23-page15.txt: [('AM', 'ERICAN')] AmSn18990608-V14-23-page2.txt: [('Ha', 'tley')] AmSn18990608-V14-23-page3.txt: [('a', 'nd')] AmSn18990615-V14-24-page13.txt: [('l', 'ee'), ('FOR', 'EIGN'), ('Boa', 'rd')] AmSn18990615-V14-24-page14.txt: [('A', 'ttention'), ('revo', 'lutions')] AmSn18990615-V14-24-page6.txt: [('per', 'sonality')] AmSn18990615-V14-24-page7.txt: [('confer', 'ence'), ('unright', 'eous')] AmSn18990622-V14-25-page11.txt: [('Chi', 'cago')] AmSn18990622-V14-25-page15.txt: [('fa', 'ther')] AmSn18990622-V14-25-page16.txt: [('appear', 'ance')] AmSn18990706-V14-26-page14.txt: [('A', 'rIAN')] AmSn18990706-V14-26-page15.txt: [('O', 'ther'), ('SAVONA', 'ROLA')] AmSn18990706-V14-26-page16.txt: [('SEN', 'TINEL'), ('Chi', 'cago')] AmSn18990706-V14-26-page4.txt: [('author', 'ity')] AmSn18990713-V14-27-page15.txt: [('cap', 'tivity')] AmSn18990720-V14-28-page11.txt: [('c', 'onsideration')] AmSn18990720-V14-28-page15.txt: [('cap', 'tivity')] AmSn18990727-V14-29-page13.txt: [('O', 'ver')] AmSn18990727-V14-29-page14.txt: [('t', 'aken')] AmSn18990727-V14-29-page15.txt: [('cap', 'tivity'), ('W', 'orld')] AmSn18990727-V14-29-page3.txt: [('di', 'recting')] AmSn18990803-V14-30-page15.txt: [('C', 'itation')] AmSn18990803-V14-30-page8.txt: [('a', 'nd')] AmSn18990803-V14-30-page9.txt: [('de', 'livered')] AmSn18990810-V14-31-page15.txt: [('inven', 'tions')] AmSn18990810-V14-31-page16.txt: [('un', 'derlying')] AmSn18990810-V14-31-page4.txt: [('Evi', 'dently')] AmSn18990817-V14-32-page15.txt: [('A', 'lAN')] AmSn18990824-V14-33-page13.txt: [('le', 'ft')] AmSn18990824-V14-33-page14.txt: [('C', 'oth')] AmSn18990824-V14-33-page7.txt: [('pro', 'posals')] AmSn18990831-V14-34-page12.txt: [('dis', 'tinguished')] AmSn18990831-V14-34-page14.txt: [('C', 'oth')] AmSn18990831-V14-34-page15.txt: [('A', 'rIAN')] AmSn18990907-V14-35-page12.txt: [('Ascend', 'ancy')] AmSn18990907-V14-35-page3.txt: [('fur', 'ther')] AmSn18990914-V14-36-page2.txt: [('Roma', 'nism'), ('Phil', 'ippines')] AmSn18990921-V14-37-page6.txt: [('to', 'iled')] AmSn18990928-V14-38-page13.txt: [('or', 'cein')] AmSn18990928-V14-38-page6.txt: [('con', 'nected')] AmSn18991005-V14-39-page11.txt: [('Com', 'mitting')] AmSn18991005-V14-39-page4.txt: [('a', 'nd')] AmSn18991012-V14-40-page14.txt: [('K', 'ANSAS')] AmSn18991012-V14-40-page15.txt: [('e', 're')] AmSn18991012-V14-40-page2.txt: [('SEN', 'TINEL')] AmSn18991019-V14-41-page14.txt: [('b', 'aptist')] AmSn18991019-V14-41-page6.txt: [('earn', 'estness')] AmSn18991026-V14-42-page11.txt: [('equal', 'ity')] AmSn18991026-V14-42-page15.txt: [('In', 'terpretation')] AmSn18991026-V14-42-page2.txt: [('move', 'ment')] AmSn18991026-V14-42-page4.txt: [('la', 'xer')] AmSn18991026-V14-42-page6.txt: [('cat', 'echisms'), ('d', 'ay')] AmSn18991026-V14-42-page9.txt: [('a', 'nd')] AmSn18991102-V14-43-page11.txt: [('cir', 'culated')] AmSn18991102-V14-43-page14.txt: [('Y', 'OE')] AmSn18991102-V14-43-page16.txt: [('Eng', 'lish')] AmSn18991102-V14-43-page2.txt: [('con', 'gress')] AmSn18991102-V14-43-page3.txt: [('and', 're')] AmSn18991102-V14-43-page9.txt: [('and', 'rE')] AmSn18991109-V14-44-page2.txt: [('j', 'ournal')] AmSn18991116-V14-45-page16.txt: [('e', 're')] AmSn18991116-V14-45-page17.txt: [('Chris', 'tians')] AmSn18991123-V14-46-page13.txt: [('E', 'xamination')] AmSn18991123-V14-46-page2.txt: [('com', 'manded')] AmSn18991130-V14-47-page5.txt: [('t', 'hese')] AmSn18991130-V14-47-page7.txt: [('AME', 'RICAN')] AmSn18991207-V14-48-page12.txt: [('be', 'na')] AmSn18991214-V14-49-page2.txt: [('Govern', 'ment')] AmSn18991228-V14-50-page12.txt: [('We', 'll')] AmSn18991228-V14-50-page16.txt: [('SEN', 'TINEL')] AmSn19000104-V15-01-page10.txt: [('j', 'ustification')] AmSn19000104-V15-01-page14.txt: [('Bo', 'nd')] AmSn19000104-V15-01-page2.txt: [('t', 'ent')] AmSn19000104-V15-01-page7.txt: [('aggrand', 'izement')] AmSn19000104-V15-01-page9.txt: [('Amend', 'ment')] AmSn19000111-V15-02-page11.txt: [('inter', 'fering')] AmSn19000118-V15-03-page13.txt: [('C', 'loth')] AmSn19000118-V15-03-page2.txt: [('the', 're')] AmSn19000125-V15-04-page10.txt: [('Con', 'trary')] AmSn19000201-V15-05-page1.txt: [('f', 'ollows')] AmSn19000208-V15-06-page10.txt: [('repre', 'sented')] AmSn19000208-V15-06-page11.txt: [('Pro', 'Tem')] AmSn19000208-V15-06-page2.txt: [('a', 'ny')] AmSn19000208-V15-06-page3.txt: [('sym', 'pathy')] AmSn19000215-V15-07-page14.txt: [('Work', 'ers')] AmSn19000215-V15-07-page15.txt: [('O', 'kie')] AmSn19000215-V15-07-page9.txt: [('c', 'om')] AmSn19000222-V15-08-page16.txt: [('SEN', 'TINEL')] AmSn19000301-V15-09-page10.txt: [('Gov', 'ERNOR')] AmSn19000301-V15-09-page13.txt: [('d', 'iet')] AmSn19000308-V15-10-page14.txt: [('A', 'IL')] AmSn19000308-V15-10-page2.txt: [('in', 'struction'), ('fr', 'ee')] AmSn19000315-V15-11-page16.txt: [('differ', 'ences')] AmSn19000322-V15-12-page14.txt: [('phys', 'ical')] AmSn19000322-V15-12-page5.txt: [('en', 'forcement')] AmSn19000329-V15-13-page9.txt: [('Shan', 'Tung')] AmSn19000405-V15-14-page2.txt: [('Bap', 'tist')] AmSn19000405-V15-14-page5.txt: [('nigh', 'tly')] AmSn19000412-V15-15-page12.txt: [('con', 'cern')] AmSn19000419-V15-16-page15.txt: [('right', 'eousness')] AmSn19000426-V15-17-page8.txt: [('command', 'Ment')] AmSn19000426-V15-17-page9.txt: [('sen', 'timent')] AmSn19000510-V15-18-page10.txt: [('for', 'th')] AmSn19000510-V15-18-page14.txt: [('lib', 'erty')] AmSn19000510-V15-18-page16.txt: [('SEN', 'TINEL'), ('E', 'TC')] AmSn19000510-V15-18-page2.txt: [('or', 'ganization')] AmSn19000524-V15-20-page9.txt: [('con', 'quering')] AmSn19000531-V15-21-page12.txt: [('Fur', 'ther')] AmSn19000607-V15-22-page1.txt: [('t', 'ee')] AmSn19000607-V15-22-page5.txt: [('Is', 'rael')] AmSn19000607-V15-22-page8.txt: [('r', 'oo')] AmSn19000614-V15-23-page6.txt: [('a', 'pparently')] AmSn19000628-V15-25-page5.txt: [('PRO', 'GRESSIVE')] AmSn19000712-V15-27-page12.txt: [('Kiang', 'Si'), ('Kiang', 'Su')] AmSn19000712-V15-27-page13.txt: [('E', 'RT')] AmSn19000719-V15-28-page12.txt: [('u', 'nwarranted')] AmSn19000719-V15-28-page2.txt: [('Refor', 'mation'), ('the', 'Refor')] AmSn19000719-V15-28-page6.txt: [('men', 'tioned'), ('in', 'dividuals'), ('relin', 'quishes'), ('and', 're'), ('ac', 'quired')] AmSn19000719-V15-28-page7.txt: [('lib', 'erty'), ('Chris', 'tian')] AmSn19000719-V15-28-page8.txt: [('perse', 'cutions')] AmSn19000726-V15-29-page2.txt: [('em', 'inent'), ('an', 'em')] AmSn19000726-V15-29-page9.txt: [('a', 'ppointed')] AmSn19000802-V15-30-page13.txt: [('th', 'eir')] AmSn19000802-V15-30-page16.txt: [('Chris', 'tendom')] AmSn19000802-V15-30-page5.txt: [('mon', 'archy')] AmSn19000809-V15-31-page10.txt: [('relig', 'ious')] AmSn19000809-V15-31-page12.txt: [('on', 'es'), ('g', 'od'), ('con', 'gregation')] AmSn19000809-V15-31-page16.txt: [('I', 'ndependence')] AmSn19000809-V15-31-page8.txt: [('Pres', 'ent')] AmSn19000816-V15-32-page1.txt: [('e', 'ra')] AmSn19000816-V15-32-page16.txt: [('LIB', 'ERTY')] AmSn19000816-V15-32-page6.txt: [('utter', 'ance')] AmSn19000823-V15-33-page13.txt: [('au', 'thorizing')] AmSn19000823-V15-33-page14.txt: [('val', 'uable')] AmSn19000823-V15-33-page16.txt: [('ob', 'liged')] AmSn19000823-V15-33-page6.txt: [('Meth', 'odists')] AmSn19000830-V15-34-page12.txt: [('or', 'dered')] AmSn19000830-V15-34-page2.txt: [('r', 'ights')] AmSn19000906-V15-35-page10.txt: [('con', 'cerned')] AmSn19000906-V15-35-page11.txt: [('an', 'ything'), ('car', 'ried'), ('the', 're')] AmSn19000906-V15-35-page12.txt: [('in', 'terpOsed')] AmSn19000906-V15-35-page15.txt: [('THE', 'SE')] AmSn19000906-V15-35-page16.txt: [('dis', 'tinguish')] AmSn19000906-V15-35-page2.txt: [('e', 'xistence')] AmSn19000906-V15-35-page6.txt: [('live', 'th')] AmSn19000913-V15-36-page14.txt: [('Nor', 'theastern')] AmSn19000920-V15-37-page16.txt: [('THE', 'SE')] AmSn19000928-V15-38-page15.txt: [('W', 'IZ')] AmSn19000928-V15-38-page16.txt: [('institu', 'tions')] AmSn19000928-V15-38-page6.txt: [('Conscien', 'ce')] AmSn19001004-V15-39-page10.txt: [('vici', 'ous')] AmSn19001004-V15-39-page14.txt: [('subscrip', 'tion')] AmSn19001004-V15-39-page2.txt: [('and', 'es'), ('es', 'pecially')] AmSn19001004-V15-39-page5.txt: [('cru', 'cified')] AmSn19001011-V15-40-page11.txt: [('gov', 'ernment')] AmSn19001011-V15-40-page15.txt: [('Y', 'ou')] AmSn19001011-V15-40-page6.txt: [('r', 'esult')] AmSn19001018-V15-41-page10.txt: [('in', 'TI'), ('gro', 'und')] AmSn19001018-V15-41-page14.txt: [('SEN', 'TINEL')] AmSn19001018-V15-41-page16.txt: [('the', 'se')] AmSn19001018-V15-41-page2.txt: [('on', 'es'), ('govern', 'ment')] AmSn19001018-V15-41-page4.txt: [('ad', 'justed'), ('a', 'lWays'), ('suc', 'ceeding')] AmSn19001018-V15-41-page5.txt: [('mo', 'rality'), ('phi', 'losophy')] AmSn19001018-V15-41-page8.txt: [('exer', 'cise')] AmSn19001025-V15-42-page10.txt: [('condi', 'tions')] AmSn19001025-V15-42-page11.txt: [('interfer', 'ence'), ('settle', 'ment')] AmSn19001025-V15-42-page12.txt: [('be', 'lieve')] AmSn19001025-V15-42-page13.txt: [('men', 'tion')] AmSn19001025-V15-42-page14.txt: [('SEN', 'TINEL'), ('subscrip', 'tion')] AmSn19001025-V15-42-page16.txt: [('Sund', 'ay'), ('Nu', 'NN')] AmSn19001025-V15-42-page4.txt: [('ob', 'viously'), ('per', 'sisted')] AmSn19001025-V15-42-page7.txt: [('guar', 'antee')] AmSn19001101-V15-43-page12.txt: [('frequent', 'ers'), ('BaP', 'tists')] AmSn19001101-V15-43-page13.txt: [('disC', 'ouraged')] AmSn19001101-V15-43-page14.txt: [('L', 'iberty')] AmSn19001101-V15-43-page16.txt: [('de', 'mand')] AmSn19001101-V15-43-page5.txt: [('wor', 'shiping'), ('dic', 'tates')] AmSn19001101-V15-43-page6.txt: [('mission', 'ary')] AmSn19001108-V15-44-page15.txt: [('SEN', 'TINEL'), ('A', 'DDRESS')] AmSn19001108-V15-44-page16.txt: [('enforce', 'ment')] AmSn19001108-V15-44-page5.txt: [('re', 'joicings'), ('the', 're')] AmSn19001115-V15-45-page1.txt: [('e', 'CO'), ('a', 'll'), ('a', 'ssumed')] AmSn19001115-V15-45-page13.txt: [('federa', 'tion')] AmSn19001115-V15-45-page14.txt: [('hand', 'somely')] AmSn19001115-V15-45-page16.txt: [('ad', 'vocateS')] AmSn19001115-V15-45-page6.txt: [('gov', 'ernments'), ('Cath', 'olics')] AmSn19001122-V15-46-page10.txt: [('N', 'eVertheless')] AmSn19001122-V15-46-page11.txt: [('depart', 'ment'), ('in', 'dicated')] AmSn19001122-V15-46-page13.txt: [('ques', 'tion')] AmSn19001122-V15-46-page16.txt: [('m', 'ab')] AmSn19001122-V15-46-page2.txt: [('gov', 'erned')] AmSn19001122-V15-46-page7.txt: [('h', 'oy')] AmSn19001129-V15-47-page11.txt: [('to', 're')] AmSn19001129-V15-47-page13.txt: [('pene', 'trated'), ('per', 'sonal')] AmSn19001129-V15-47-page15.txt: [('n', 'eeds')] AmSn19001129-V15-47-page16.txt: [('a', 're')] AmSn19001129-V15-47-page4.txt: [('en', 'largeth')] AmSn19001206-V15-48-page3.txt: [('the', 'Es')] AmSn19001206-V15-48-page6.txt: [('Cong', 'resSman'), ('Con', 'gress')] AmSn19001206-V15-48-page7.txt: [('pri', 'marily'), ('cer', 'tainly'), ('no', 'es')] AmSn19001206-V15-48-page8.txt: [('Vir', 'ginia')] AmSn19001220-V15-50-page12.txt: [('c', 'urch'), ('or', 'ganized')] AmSn19001220-V15-50-page6.txt: [('Massa', 'chusetts'), ('colo', 'nies'), ('state', 'ment')]
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction7 Average verified rate: 0.9830117359134304 Average of error rates: 0.018671590569979114 Total token count: 8363303
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )
[("'", 8053), ('t', 4426), ('e', 3978), ('d', 3950), ('w', 3754), ('co', 3408), ('m', 3176), ('n', 3046), ('f', 2035), ('r', 2028), ('th', 1645), ('g', 1371), ('mo', 1160), ('u', 926), ('x', 864), ('ex', 521), ('pa', 410), ('q', 399), ('sunday-law', 334), ('k', 315), ("the'", 304), ('pp', 299), ('tion', 276), ("conscience'", 260), ('ch', 253), ('seventhday', 249), ('re', 224), ('ga', 220), ('oc', 218), ('z', 215), ('wm', 215), ('satolli', 210), ('employes', 209), ('munn', 207), ('ti', 200), ('id', 181), ('un', 173), ('ry', 170), ('al', 166), ('sunday-closing', 160), ('ca', 151), ('ment', 146), ('chain-gang', 136), ("to'", 134), ('nd', 130), ('ll', 128), ('lb', 125), ('il', 123), ('bateham', 122), ('cmsar', 121), ('se', 120), ('aleck', 112), ("and'", 109), ('socalled', 106), ('sunday-rest', 104), ('sentin', 104), ('milly', 103), ('cc', 101), ('te', 101), ('erican', 99), ("of'", 98), ('va', 95), ('nt', 92), ('fellow-citizens', 92), ('vt', 92), ('cd', 92), ('tt', 89), ('aa', 89), ('op', 89), ('ft', 88), ('-', 88), ("a'", 86), ('eze', 84), ('attorney-general', 83), ('ma', 82), ('csar', 81), ('stundists', 80), ('cereola', 79), ('lc', 79), ('neander', 78), ('religio-political', 76), ('mc', 74), ('law-abiding', 74), ('sundaylaw', 74), ('rican', 73), ('tions', 72), ('ay', 72), ('li', 72), ('edmunds', 71), ('ni', 71), ('ra', 71), ("crafts's", 70), ('rd', 70), ('ia', 70), ("is'", 69), ('sr', 69), ('freethought', 68), ("in'", 67), ('si', 67), ("crafts'", 66), ("folks'", 66), ("cmsar's", 66), ('un-american', 64), ('sabbath-day', 63), ('rest-day', 62), ('sabbath-breaking', 60), ("''", 59), ('pr', 58), ('mt', 58), ("'s", 58), ('ic', 57), ('ac', 57), ('ne', 56), ('candidus', 56), ("barbers'", 56), ('paeifie', 56), ('na', 55), ('tregelles', 54), ('geikie', 54), ('ie', 54), ("that'", 54), ('ican', 53), ("an'", 53), ('ity', 53), ('dred', 53), ('employe', 52), ("it'", 52), ('ky', 51), ('assoeiation', 51), ('ob', 51), ('litt', 51), ('ri', 50), ('ernment', 50), ('wellknown', 50), ('coxey', 50), ('ci', 50), ('ofthe', 49), ('tional', 49), ('ce', 49), ('es', 49), ('ments', 49), ('vo', 48), ('aro', 48), ('pre', 47), ('fa', 47), ('ph', 47), ('cathedra', 46), ("jones'", 46), ('ow', 45), ('leiper', 45), ('ro', 45), ('forit', 45), ('ent', 45), ("citizens'", 44), ('mi', 43), ('sundayclosing', 43), ('judefind', 42), ('ple', 42), ('sh', 42), ('erty', 42), ('times-herald', 42), ('chaingang', 42), ('tischendorf', 42), ('ei', 41), ('bo', 41), ("o'keefe", 41), ('hiberty', 41), ('cr', 41), ("be'", 41), ("law'", 40), ('wo', 40), ('sunday-keeping', 40), ('ea', 40), ('yo', 40), ('chapelle', 40), ('ct', 40), ('tinel', 40), ('em', 40), ("i'", 39), ('ip', 39), ('keane', 39), ('tv', 39), ('copygraph', 39), ("waterman's", 38), ('lachmann', 38), ('tian', 38), ('oi', 38), ('kai', 38), ('ica', 38), ("cruden's", 38), ('ers', 37), ('non-sectarian', 37), ('adress', 37), ("csar's", 37), ('saye', 37), ('church-and-state', 37), ("american'", 37), ('coxe', 36), ('ta', 36), ('io', 36), ('tr', 36), ('dwyer', 36), ('oa', 35), ("for'", 35), ("not'", 35), ('pf', 35), ('tir', 35), ('td', 35), ('mass-meeting', 35), ('swiggart', 35), ("this'", 35), ('fi', 35), ('az', 35), ('law-making', 34), ('ance', 34), ('da', 34), ('first-day', 34), ('jagoe', 34), ('inthe', 34), ('godgiven', 34), ("church'", 34), ('brunot', 33), ('ther', 33), ('cwsar', 33), ('gious', 33), ('entinel', 33), ('eh', 33), ('cl', 33), ('sa', 32), ('ss', 32), ("as'", 32), ('vice-presidents', 32), ('base-ball', 32), ('ap', 32), ('stitution', 32), ("infants'", 32), ('ba', 32), ('saloon-keepers', 32), ('oo', 31), ('lt', 31), ('sun-worship', 31), ("liberty'", 31), ('ts', 31), ('one-seventh', 31), ('rt', 31), ('prayer-meeting', 31), ('slattery', 31), ('colitical', 30), ('efical', 30), ('mn', 30), ('rs', 30), ('vox', 30), ('fr', 30), ('ao', 29), ('os', 29), ('mg', 29), ("are'", 29), ("roberts'", 29), ('ge', 29), ('tc', 29), ('rn', 29), ('kauffman', 29), ('fo', 29), ('ns', 29), ('crowther', 29), ("workingmen's", 29), ('puplishing', 29), ("all'", 29), ('holidayism', 28), ('oe', 28), ('self-preservation', 28), ('street-cars', 28), ("parkhurst's", 28), ('charta', 28), ('dei', 28), ('newyork', 28), ('durborow', 28), ('liberty-loving', 28), ('zwiebach', 28), ("fathers'", 28), ('non-catholics', 28), ('tl', 28), ('fair-minded', 27), ("krug's", 27), ('merican', 27), ("sabbath'", 27), ('non-observance', 27), ('ful', 27), ('iu', 27), ('schurman', 27), ("cushing's", 27), ('mccauley', 27), ('self-defense', 27), ('theo', 27), ('fellow-man', 27), ("gibbons'", 27), ("or'", 26), ('counter-arguments', 26), ('sabbathkeeping', 26), ('ablegate', 26), ('fora', 26), ('fide', 26), ('platt', 26), ('thon', 26), ("with'", 26), ('itis', 26), ('hto', 26), ('bula', 26), ("god'", 26), ('ve', 26), ("which'", 26), ('pecci', 26), ('divinely-appointed', 26), ('ae', 26), ('non-religious', 26), ("by'", 26), ('selfgovernment', 26), ('ence', 25), ("lions'", 25), ('weakley', 25), ('eferson', 25), ('nethinim', 25), ('weyler', 25), ('feligious', 25), ('ig', 25), ('ou', 25), ('fah', 25), ("d'aubigne", 25), ('martinelli', 25), ('tn', 25), ("at'", 25), ('sundayschool', 25), ('ous', 25), ('ridpath', 25), ('tne', 25), ('publishinc', 25), ('loth', 24), ('krug', 24), ('ceesar', 24), ('stuttle', 24), ('mehan', 24), ('tothe', 24), ('tiie', 24), ('ligion', 24), ("preachers'", 24), ("if'", 24), ('phelan', 24), ('ws', 24), ('ut', 23), ('humbert', 23), ('lawabiding', 23), ('twentyfive', 23), ('atterbury', 23), ('sunday-sabbath', 23), ('nn', 23), ('arierican', 23), ('ble', 23), ("saints'", 23), ('ber', 23), ('om', 23), ('ious', 23), ('tbe', 22), ('anb', 22), ('ili', 22), ('ef', 22), ('ib', 22), ('bt', 22), ('tb', 22), ('ligious', 22), ("have'", 22), ('ab', 22), ('scudder', 22), ('pany', 22), ('sel', 22), ('wi', 22), ('gi', 22), ('anti-christian', 22), ("we'", 22), ('jeferson', 22), ('xact', 21), ("pub'rs", 21), ("grocers'", 21), ("e'", 21), ('comegys', 21), ('scovel', 21), ('sevent', 21), ('po', 21), ('mee', 21), ('witham', 21), ('thos', 21), ('ng', 21), ('yefferson', 21), ('ive', 21), ("sunday'", 21), ('notgive', 21), ('postmaster-general', 20), ("hutchings'", 20), ('washburne', 20), ("he'", 20), ('religiopolitical', 20), ('kellog', 20), ('romer', 20), ('healthgiving', 20), ("soldiers'", 20), ("satolli's", 20), ('mcglynn', 20), ('sien', 20), ('ject', 20), ("from'", 20), ('ee', 20), ("hours'", 20), ('ary', 20), ('pt', 20), ('anierican', 20), ('rosemond', 20), ("vick's", 20), ('parens', 20), ('bythe', 20), ("on'", 20), ('longnecker', 20), ("was'", 20), ("printers'", 19), ('governor-general', 19), ('anierica', 19), ('pressense', 19), ('fbr', 19), ('micr', 19), ('everts', 19), ('rorabacher', 19), ("pastors'", 19), ("gov't", 19), ('tp', 19), ('iti', 19), ('rr', 19), ('seelye', 19), ('arther', 19), ('wishart', 19), ("people'", 19), ('cosgrove', 19), ('gt', 19), ('det', 19), ('lery', 19), ('abbe', 19), ('stundist', 19), ("day'", 19), ('hagans', 19), ('montefiore', 19), ("will'", 19), ('chain-gangs', 19), ('law-makers', 19), ('sundaykeeping', 18), ('dc', 18), ('reli', 18), ('tae', 18), ('od', 18), ('enright', 18), ('anti-catholic', 18), ('non-interference', 18), ('tht', 18), ('sas', 18), ('oz', 18), ('efferson', 18), ('ible', 18), ("th'", 18), ('tianity', 18), ('tarawera', 18), ('curlett', 18), ('tii', 18), ('ey', 18), ('tolstoi', 18), ('wa', 18), ('self-styled', 18), ('--', 18), ("would'", 18), ('ccesar', 18), ('oity', 18), ('avery-stuttle', 18), ('nnw', 17), ('mal', 17), ('bf', 17), ('prin', 17), ("righteousness'", 17), ('jt', 17), ('clingman', 17), ('cedarquist', 17), ('newyorkcity', 17), ('tra', 17), ('ricans', 17), ('saloon-keeper', 17), ('rubiana', 17), ('eral', 17), ('prisot', 17), ('post-offices', 17), ('theunited', 17), ('ies', 17), ('nu', 17), ('ol', 17), ("no'", 17), ('fl', 17), ('sabbathbreaking', 17), ("a'nan", 17), ('ress', 17), ('sommerville', 17), ('ation', 17), ('church-going', 17), ('cood', 17), ('mullally', 17), ('self-governing', 17), ('nel', 17), ('um', 17), ('bondst', 17), ('philpott', 17), ('law-breaker', 17), ('ik', 17), ('senti', 17), ('ame', 17), ('leivites', 17), ('pel', 17), ("apostles'", 17), ('hy', 17), ("schaff's", 16), ('dieu', 16), ('selfevident', 16), ('dayto', 16), ('ioo', 16), ('tf', 16), ('prepartion', 16), ('cp', 16), ("enright's", 16), ("his'", 16), ('mit', 16), ('relig', 16), ('thepeople', 16), ('sie', 16), ('alfaro', 16), ('symmachus', 16), ('xl', 16), ('ples', 16), ('facto', 16), ('erromanga', 16), ('sunday-keepers', 16), ('dividual', 16), ('peryear', 16), ('peffer', 16), ('re-enact', 16), ('ish', 16), ('socialpurity', 16), ('ith', 16), ('cs', 16), ('wilkie', 16), ("l'", 16), ('ul', 16), ('hodgson', 16), ('basle', 16), ('bas', 16), ('eousness', 16), ('zi', 15), ("who'", 15), ('ite', 15), ('sabbath-breakers', 15), ('americansentinel', 15), ('ag', 15), ('rhe', 15), ('nonsuch', 15), ('pepsia', 15), ('co-workers', 15), ('gallinger', 15), ('labberton', 15), ('thatthe', 15), ('intrust', 15), ('lttra', 15), ('ork', 15), ('aw', 15), ('law-breakers', 15), ('milman', 15), ('rampolla', 15), ("christian'", 15), ('wellbeing', 15), ("milman's", 15), ('klip', 15), ('bi', 15), ('ons', 15), ('ctesar', 15), ("their'", 15), ('re-enacted', 15), ('populi', 15), ('governinent', 15), ('wor', 15), ('hach', 15), ('sc', 15), ("miles'", 15), ("ginn's", 15), ('ih', 15), ('janes', 15), ('ov', 15), ('sulus', 15), ('stinday', 15), ('xo', 15), ('ist', 15), ('ectarian', 15), ("o'gorman", 15), ('tkt', 15), ("they'", 15), ('alvierica', 15), ('je', 15), ('birney', 15), ("religion'", 15), ('avery-stiittle', 15), ('sf', 15), ('np', 14), ('thb', 14), ('qa', 14), ("pres'ts", 14), ('secker', 14), ('intelligeneer', 14), ("'the", 14), ('yeferson', 14), ('self-exaltation', 14), ("tourists'", 14), ('chiniquy', 14), ('rittenhouse', 14), ('ormore', 14), ("moses'", 14), ('peo', 14), ('goverment', 14), ('plete', 14), ('lished', 14), ('thp', 14), ("sup'ts", 14), ("gault's", 14), ('oth', 14), ('cortlandt', 14), ('non-union', 14), ('br', 14), ("ccesar's", 14), ('anglo-saxons', 14), ('christain', 14), ('restday', 14), ('su', 14), ('nr', 14), ('rv', 14), ('eemperance', 14), ('sabbath-breaker', 14), ('gb', 14), ('tains', 14), ("mcallister's", 14), ('malum', 14), ("williams'", 14), ("neander's", 14), ("adventists'", 14), ('lexow', 14), ('confreres', 14), ('thr', 14), ('ncluding', 14), ('af', 14), ('sient', 14), ('tution', 14), ('gl', 14), ('tennesseeans', 14), ('mu', 14), ("but'", 13), ('fon', 13), ('ver', 13), ('christ-like', 13), ('aivierican', 13), ('leaguers', 13), ('wu', 13), ("mf'g", 13), ('hoc', 13), ('dibbs', 13), ('anti-religious', 13), ('ntinel', 13), ('ual', 13), ('themies', 13), ('dont', 13), ('ex-president', 13), ('gr', 13), ("one'", 13), ('two-horned', 13), ('rp', 13), ("coxey's", 13), ('higinbotham', 13), ("t'", 13), ("protestants'", 13), ('pilman', 13), ('froni', 13), ('foi', 13), ('meeting-house', 13), ('mccourt', 13), ('thd', 13), ('waupon', 13), ("f'", 13), ("has'", 13), ('forthe', 13), ('itt', 13), ('hiscock', 13), ('sp', 13), ('self-contradictory', 13), ("torry's", 13), ('cif', 13), ("its'", 13), ('dn', 13), ('princi', 13), ('cer', 13), ('thi', 13), ('ec', 13), ('hee', 13), ('sabbathkeepers', 13), ('lelvites', 13), ('one-man', 13), ('tms', 13), ('rundschau', 13), ('tlie', 13), ('tax-payers', 13), ('non-christian', 13), ('self-appointed', 13), ("breeders'", 13), ('kossean', 13), ('olesen', 13), ('botkine', 13), ('ny', 13), ('volksraad', 13), ("whaley's", 12), ('tem', 12), ('constitu', 12), ('ess', 12), ('froin', 12), ('robb', 12), ('theire', 12), ('thein', 12), ('ors', 12), ('ical', 12), ('chappelle', 12), ("churches'", 12), ('self-constituted', 12), ('wouldbe', 12), ('entin', 12), ('week-day', 12), ('thority', 12), ('fast-day', 12), ("were'", 12), ('ex-mayor', 12), ('fortynine', 12), ('ture', 12), ('bok', 12), ('whitall', 12), ("cents'", 12), ('sition', 12), ('tte', 12), ('self-interest', 12), ('croker', 12), ("d'aubigne's", 12), ('merous', 12), ('cai', 12), ('combatting', 12), ('observa', 12), ('fp', 12), ('yonx', 12), ('gainst', 12), ("such'", 12), ('ht', 12), ('ds', 12), ('masse', 12), ('self-respecting', 12), ('pc', 12), ('ivierican', 12), ('tobe', 12), ("do'", 12), ("christ'", 12), ('ki', 12), ('ddress', 12), ("neat's", 12), ('twenty-fifth', 12), ('ttin', 12), ('maurer', 12), ('bondstreet', 12), ('inter-state', 12), ('lation', 12), ('ang', 12), ("any'", 12), ('rk', 12), ('gx', 12), ('sunday-observance', 12), ('havergal', 11), ("james'", 11), ('olic', 11), ('thechurch', 11), ('sm', 11), ('cz', 11), ('df', 11), ("dealers'", 11), ('ke', 11), ('ets', 11), ('pm', 11), ('ex-senator', 11), ('lieve', 11), ('uncompromis', 11), ('mm', 11), ('ine', 11), ('sherk', 11), ('fifty-second', 11), ('selfpreservation', 11), ('derstanding', 11), ('naw', 11), ('tre', 11), ("states'", 11), ('theni', 11), ("state'", 11), ('communica', 11), ('rose-wood', 11), ('androscoggin', 11), ("bakers'", 11), ('sk', 11), ('taschereau', 11), ('qt', 11), ('tm', 11), ('griffitts', 11), ('fellow-workers', 11), ('kw', 11), ('bradfield', 11), ('houk', 11), ('fot', 11), ("so'", 11), ("'a", 11), ('amyot', 11), ('muskoka', 11), ('pl', 11), ('aivierica', 11), ('erties', 11), ('qf', 11), ('haye', 11), ('ost', 11), ('isthepapacyinprophecy', 11), ('sev', 11), ('rian', 11), ('mits', 11), ('notre', 11), ('key-note', 11), ('hirsch', 11), ('sealings', 11), ('rin', 11), ('evil-doers', 11), ('nott', 11), ("civil'", 11), ('theseventh', 11), ('ml', 11), ('kee', 11), ('yr', 11), ('gress', 11), ('ex-governor', 11), ('cramer', 11), ('lr', 11), ('fs', 11), ('informations', 11), ('paoipio', 11), ('twentyfour', 11), ('ridgetown', 11), ('axact', 11), ('times-democrat', 11), ('tians', 11)]
Review Remaining Errors¶
reports.docs_with_high_error_rate( summary , min_error_rate = .2 )
[('AmSn18900918-V05-37-page4.txt', 0.495), ('AmSn18900918-V05-37-page1.txt', 0.472), ('AmSn18900918-V05-37-page8.txt', 0.459), ('AmSn18900918-V05-37-page5.txt', 0.413), ('AmSn18970701-V12-26-page1.txt', 0.404), ('AmSn18980616-V13-24-page15.txt', 0.371), ('AmSn18980630-V13-26-page15.txt', 0.356), ('AmSn18980609-V13-23-page15.txt', 0.351), ('AmSn18980623-V13-25-page15.txt', 0.335), ('AmSn18980714-V13-27-page15.txt', 0.325), ('AmSn18960220-V11-08-page7.txt', 0.262), ('AmSn18971007-V12-39-page16.txt', 0.246), ('AmSn18971014-V12-40-page16.txt', 0.238), ('AmSn18951031-V10-43-page7.txt', 0.224), ('AmSn18951219-V10-50-page7.txt', 0.215), ('AmSn18951024-V10-42-page7.txt', 0.204)]
docs_2_check = [x[0] for x in reports.docs_with_high_error_rate( summary , min_error_rate = .2 ) if x[1] > 0.2]
# utilities.open_original_docs(docs_2_check, directories['cycle'])
There are two main drivers of the OCR errors. First, the original scans of AmSn18900918-V05-37
captured either the layer behind or the shadow of text on the next page. This makes clean OCR all but impossible from that scan. The other major driver of errors is the advertisement sections of the publications, particularly ads for an Interlinear Greek New Testament.
Check Long Errors¶
reports.long_errors(errors_summary, min_length=15)
(['intensely-orthodox', 'rezteoxfebeiloenir', 'virreasetiabwayi', 'pleasant-spirited', 'eheapserviceable', 'poreversepalialf', 'estabtablishment', 'pfopositionbliat', 'christiancitizenship', 'cliiynirtreczenanmouesna', 'themonthlypaymentwillbe', 'choochee-choochee', "sup'ercalendered", 'laicciohuasrmaniangstsrtsltyittzfhefrir', 'massachusetts--mr', 'malrithinrinodths', 'gamblingfraternity', 'theoriesodfisease', 'sabbath-profanation', 'subscripmountains', 'forty-eight-page', 'couldfollybegreaterthanpraying', 'increasing-favor', "considerable'part", 'non-establishment', 'commonwealthsaid', 'theseprosecutions', 'poll-parrot-wise', "administered'that", 'theamericansabbathunion', "notwithstanding'it", 'stylesofvehicles', 'themselvesandffarrless', 'anti-prohibition', 'theflorentinemartyr', 'poverty-stricken', 'duetonasalcatarrh', 'supportingbustles', 'zarassewmesseepmaimpaw', 'petitionsofthelongislandfishermen', 'christianstatesman', 'icarapriociaovrat', 'statute-intrenched', 'theyshallbedoneaway', 'office-worshiping', 'elfqpronocincing', "prornittetitpapets'are", 'andwasonlywaiting', 'ouriettidrofbthe', 'elattliimiiiiimi', 'counterpetitions', 'ioutlinesvividlytherelationthatexistedbetween', 'languagearchbishop', "orton'simproveddrenchinggil", 'notwiamstannfrfo', 'catholicsunderstand', "jitdge'pennypacker", 'nationalreligiousliberty', 'sunday-observance', "and'unmistakably", 'beaphjseabrighklong', 'gold-from-sea-water', 'andtheschemewentthrough', 'peorepresentative', 'iiiiiiiiiiiiiiiii', 'statuteintrenched', 'foreignnationality', 'mmsmwtimmmuummlimmw', 'associationssentafrom', 'iniquity-steeped', 'nineieeathncentury', 'commissioner-general', 'politisentiments', 'anti-imperialists', 'well-constructed', 'chattanoogadaily', 'secretary--foster', 'fellow-petitioners', 'andsugarbeetland', 'penny-in-the-slot', "remarkable'importation", 'stalwart-looking', 'magistratebelieveth', 'printedongoodpaper', 'incompetentreligious', 'advertisementcaptured', 'quasi-partnership', 'indisputabletruth', 'forgivethesetears', 'self-contradiction', 'physicalnecessity', 'ailliliilliiiiiiimiiiiii', 'corporaexplained', 'ailopteclasnytbratedl', 'owisosossorramomontr', 'smallconsideration', 'anxietyofthesundaypreachersisto', 'butthenishallknow', 'self-stultifying', 'stevensonandhisconstituents', 'thebookisneatlygottenup', 'self-stultification', 'ecclesiastisustain', 'imprisonmenttomakemenwiserandtteer', 'following-conclusions', 'beenurgohtintothechurchbythe', 'fellow-religionists', 'questiondestruction', 'karaprieicrovrai', 'comprehensiveview', 'revolutionarywar', 'conversation-published', 'conditionsprevailing', 'anddiseasesofthe', 'condignpunishment', 'anti-imperialism', 'americanimperialism', 'perfectionshould', 'reefeoivredyratrsefeinreqnucierietso', 'alreadyfarniliar', 'therightsofthepeople', 'accordpreservation', 'icldiisoienaawary', 'andhebaselyintimates', 'willianimckinley', 'commanderin-chief', 'extreme-distress-of', 'thesecommunities', 'selfregeneration', 'austria-hungarian', 'non-commissioned', 'vanymoohearrnramedneorastonninneljadyek', 'non-interference', 'publishingcompany', 'national-reform-sabbath-union-sundayclosing', 'iknowevenasalsoi', 'notwithstandingconstitutional', 'actotjejtotactat', 'isfullofhappysur', "worlsd'exposition", 'alnericanbentiuel', "embedding'itself", 'rapidlyincreasing', 'establishredemption', 'liraitedlerritorrreads', 'karapyllobaovrat', 'anti-constitutional', 'determinationforesaid', 'nomorethanfollowing', 'imbibingreligious', 'minister-secretary', 'government-without-the', 'twasintrafalgarsbay', 'comingsolongastheservicesareheld', 'inter-communication', 'weligtonsiliberty', 'constructionsupon', 'iiiiiiiiiiiiiiii', 'politicalatheism', 'idnfvtleilubageiok', 'iiiiimiiiiimmiiiiini', 'religious-persecution', 'tennesseeforbids', 'ever-threatening', 'non-communicating', 'stampswillbeacceptableforsmallremit', 'bibleobjectlessons', "don'ttakeyaonufaingrocuytwuhnatitl", 'refceoivreydeisarsinir', 'religfundamental', 'successfuyllreosssecuthd', 'imiiiiimiiiiimiiiiim', 'gasternppaasssenger', 'uponllegislation', 'supisillustrated', 'socially-degenerating', 'convertedintoaholiday', 'long-anticipated', 'conimissioicfrem', 'andadvanceordershavebeenreceivedforhundredsofcopies', 'sorely-persecuted', "it'diaerithinates", 'includingtheologians', 'madeinstitutions', 'toanythingtending', 'sundaylawsclaimthat', 'venerablespioneer', "theworl'd-fathed", 'dark-superstitions', 'caramminambemimennommirom', 'faceteytvlaoliat', 'ttttiiiiiityttttttttttf', 'american-sabbath-union', 'whichweresosuddenlyconvertedintoreligiopolitical', 'theehouseholdmoellerofhealth', 'non-preservative', 'suppressordinance', 'pleasure-seekers', 'namesofscripture', 'sientripientimea', 'interestingthisweek', 'songsforlittleones', 'ittttttttttttttttttti', 'enforcepolitical', "aseeuted'jjaammees", 'toexerciseanycoercionwhatever', "l'itite'situcat'", "heading'paragraphs", "that'association", 'office-distributing', 'excitement-loving', 'thereligicairiberty', 'exereiseithereofv', 'liberty-lovingstatesmen', 'inresponsibility', 'photo-electrotyping', 'thoroughlyfurnished', 'theirconvictions', 'judgesteinoverruledthepleathat', 'bwaltztatoyeatort', 'bibleillustrationsandstoriesthatwehavebeen', 'religiouscharaeter', 'consideraminister', "religiously'observe", 'heavenlycitizenship', 'ongregationaliistychluich', 'singlesubscription', 'smokeof-burning-', 'heavyto-be-borne', 'compelleartalligten', 'seventyfive-dollar', 'carefully-guarded', 'adaptthelifeofchristtothe', 'nationalpridethat', 'zondaysschencling', 'pseudo-christianity', 'prisonconsecrated', 'church-and-state', 'protectionaccorded', 'postmaster-general', "salisbury'sgearlesscorset", 'cannot-regard-their', 'fellow-countrymen', 'especiallysuited', 'smintrifilivmila', 'thoseinstruments', "governmentsgod's", 'importantpiestion', 'ckinciickieseuonf', 'ivilerlicelvajle', 'appallingproportions', 'great-grandfather', 'sixtymile-an-hour', 'romanismandcivilliberty', 'compulsory-idleness', 'constistitutional', 'correctlyrepresent', 'constitution--legislation', 'democratic-republican', 'gtilttertisenteitth', 'employmentelsewhere', 'idouwillixtnintrcuicstoru', 'sseellrfonouncing', 'ffitymityytyymyytyymtv', 'andthysicalvigor', "the'difficulties", 'ihavereceivedmybible', 'religio-politicians', 'democraticgovernment', 'counter-memorials', 'familygovernment', 'racravripyvioatv', 'atatatatatatatatatat', 'ifwiththetongues', 'sundayconcertintheoperahouse', 'commandment-keepers', 'allworkingpeople', "apartmentbuilding's", 'disconnectedherself', 'florencejarizona', 'practicepersecutionfor', 'intentionallyignore', 'miiiiimiiiiiimiiiiimiimmiiiiim', 'certainconditions', 'religioueliberty', 'lieutenant-colonel', 'scatteredthrough', 'religionsithings', 'appealandremonstrance', 'itigillihwililljaiiira', 'accomplishstatement', 'furtherexpressed', 'practicalreference', 'sendittoyourfriends', 'theargumentwhich', "money-gatherers'", 'seventh-day-keeping', 'amusement-loving', 'divinely-imposed', 'nviaenikensommewniegoe', 'asgoodassellsfor', 'agnosticsperhaps', 'saturday-sabbath', 'mmerrimmilummummulimmmummillunmil', 'sacredychronology', 'semi-reitschensk', 'perrnariehipolitieo', 'heaven-descended', 'ttttttttttttttttttttttttttttli', 'missiourielected', 'pseudo-religious', "superintendents'", 'writefordescebtlyocattuegue', 'sunday-legislation', 'self-opinionated', 'protestant-jesuit', 'permissibleunder', 'weaschristianworkersinthecauseofchristdeem', 'ofassortedhealthfoodcrackerssentpost', 'cenacliolieeoelpe', 'beuncompromisingiyoppesedto', 'priziateyinterviews', 'politicsirepresents', 'divinely-ordained', 'independencelies', 'kaitnyrdieixicseuonf', 'theyosemitevalley', 'everybodylaughed', 'counter-revolution', 'the-mediterranean', 'sheepskin-covered', "and'spiritualists", 'scientificamerican', 'otherinsurrectionists', 'dayadventistsandthecourts', 'nationshilthighty', 'thesafeligiousintolerancefromwhichallreligious', 'zntsthxtrealgcterxwc', 'social-amusement-loving', 'enough-punishment', 'insurpassability', 'ritualhealtheltreat', "arrested'c-harged", 'merieanstatepapers', 'quarter-centennial', "teifige'znegivgtig", 'diosthnontoptuhfci', 'civilgovernmentandreligion', 'breckinridge-morse', 'amazingprevalence', 'christiansunconsciously', 'amtrintcarkilong', 'thegiairoraffitiliw', 'etianprinciplesof', 'importancethanthe', 'sundaylawmovement', "calieds'aisealpt", 'court-martialled', "xrcavolitio'obegbhuingezanra", 'kirchengeschichte', 'american-philippines', 'inquisitor-general', 'church-fellowship', 'thenaturalallianceexisting', 'independencovhich', 'anti-evangelical', 'quickly-discovered', 'self-preservation', 'carriedoutinthenameofthewholecatholiccommu', "the''''anierican", 'nineteenthcentury', 'correspondentadmits', 'advertisement-writers', 'undervitalizatiom', 'pagancounterfeits', 'counter-petition', 'christianity-with', 'larciestiiedical', "attorney-general's", 'overwhelminglyin', 're-enteringfields', 'decently-dressed', "national'apostasy", 'andtobringdowndamnationandcursesuponevery', "ruted'jamestanner", 'compelattendance', 'othermakesmaybegood', 'specialarrangement', 'indifferentiated', 'theunitedstatessenate', 'assurriptionists', 'amitricanininelo', 'itisjustwhatihavelongwanted', 'selfpreservation', 'ittenmtlettmmtrimitilm', 'uticompromisingly', 'certainlynoeffort', 'inconsistencythe', 'presbyterianbrother', 'aviorousandtisrrinad', 'revolutionaryresolution', 'fdiesthnontoptubfef', 'subscriptionprice', 'commercial-appeal', 'unitedstatesconstitutionasit', 'rapidly-increasing', 'ecclesiasticocivil', 'evidenceattachecl', 'thebiblegivenasapresentforsixnewyearlysubscriptionsat', "our-times-'toward", 'fellow-believers', 'brigadier-general', 'religio-philosophic', 'mmiimiiiiiiimiiiiiimiiiimm', 'icarapyrieliaovrat', 'entiremembership', "pgafria'nb'tatif", 'reuaftintofbeeticed', 'ftillieratillteminnimiennisiiiiiiwangmwo', 'papacywasfullydeveloped', "se'whatetherssay", "administration'was", 'fifteenth-century', 'icfaytoaulhoagveoafnoyuirdepaerobflpiucracthiasoin', 'politicaldiseussions', "oriall'ittiseiprer", 'pointofdisturbance', 'would-bereformers', 'ten-thousand-mile', 'whichgovernments', 'thedowadelegationrand', 'tobemightyupontheearth', 'prohibitionblasphemy', 'church-cherished', 'temporalexpediency', 'thenationalsundaylawbanned', 'incomprehensibilities', 'belieftprqbrship', 'astothemeritsofthebibleweoffer', 'amendmentthought', 'onstratethepropriety', 'catholiestandard', 'educationaljathe', 'wemustthereforeconcludethatthe', 'includedwhatever', 'whatabouttheindividualwho', 'pageillustrations', 'tenderrestsupontendrive', 'precipitatelyfrom', 'goodsubstantialhighgradebicy', 'thedifferentstates', 'mueontoutlhninfg', 'penalties-enacted', 'furtherinformation', 'thesentinellibrary', 'antipedo-baptists', 'soul-crushingcorporations', "smitli'sdialraifi", 'religiousinstruction', 'mixiimiiiiimiiiiiinniiiiime', 'gttittertistinents', 'ordinaryinstruments', "teachers'fecieration", 'church-instituted', 'defendthemselves', 'theamericansentinel', 'righteousnessright', 'fourtlybommandment', 're-establishment', "will-o'-the-wisp", 'sanctimoniouspolitical', "students'library", 'forashorttimeonly', 'ever-compassionate', 'aravarimiiiiininisruninisimigivar', 'hethatspeakswithatongue', 'socialist-catholic', 'imomenzipipimmiiisim', 'foreigncountries', 'muchtoitseducationalvalue', 'luinrdeorstparnod', 'addireadytobreakandoverwhelmitinsocial', "atnerican'exposition", "will-o'-thewisps", 'miiiiimiiitimomi', 'individualchristians', 'andpronouncesthemwith', 'leaderoftheaceinpracticalimprovements', 'sthepapacyinprophecy', 'underacknowledgment', 'nationalconstitution', 'pilateunderstood', 'samplecopiesmailed', 'commander-in-chief', 'neofthelargestsanitariumin', 'containingadditional', 'selfstultification', 'caveatsjrademarks', 'cross-questioning', 'waspresentatthetrialoutlinestheproceed', 'admininistration', 'statedistinguished', "fox'sbookofmartyrs", 'labor-protecting', 'nineteenth-century', 'presbytericvnism', 'itfollowsthatthisisnot', 'ex-attorney-general', 'pacificpressmussingco', "proipnhepsayri'npanardt", 'ourbabyisatestimonialtosanitariumfood', 'self-sufficiency', 'tdivinitycircuit', 'idsimpleconstruction', 'whichisperfectlyproperifthey', 'presspublishingco', 'statcesonstitution', 'knowledge-disseminating', 'politicalcorruption', 'inventioncertain', 'sergeant-at-arms', 'inseparablerelation', 'thecounselforthe', 'civicrighteousness', 'religious--observance', 'densely-populated', 'prohibitspriests', 'afitritifiralneviran', 'observinstitution', 'physical-necessity', 'religio-political', 'interdenominationalism', 'opportunitiesfor', 'uncompromisinglyopposed', 'evaseparate-isfr', 'non-intervention', 'information-seekers', 'half-century-old', "wetfavatwarseletertese'letetesetew", 'thecatholicchurchcannotdoany', 'papacyinprophecy', 'politicalreligious', 'billiard-playing', 'unfpracarpanmici', 'persecutionbothinrussiaandgermany', 'theresponsibility', 'hisgloriousappearing', 'importantito-day', 'religious-liberty', 'karapynolicrerat', 'cynosureindorses', 'instanceexpansion', 'evangelical-lutheran', 'thegreatestreformer', 'pago-christianity', 'unctralitioraitiftkly', 'postagestampsaccepted', 'thtshbeoonikahnauscbriethfohrder', 'trance-mediumship', 'act-of-parliament', 'titmitmmitimmvitimmvimimim', 'constitutional-principles', 'morally-instructed', 'ithethobjecickainnaidig', 'attorney-general', 'spiritualmindedness', 'latecommissionerof', 'awnfloaiiiavhmasct', 'straight-jacketed', 'tailtstkibitiontiottitallp', 'christianisabbath', 'prayer-meetmeeting', "ictrliot'ocoteuhi", 'pseudo-millennium', 'vanderbilt-rockefeller', "themselves'damnation", 'piearksetooasdas', 'politicalspeeches', 'anotherinfluentialcommittee', 'independtrespassers', 'tax-gatherportant', "be'liarticulatif", 'direcmanufacture', 'vssbiatotiaysord', 'religioustraining', 'ednimittaeappointed', 'sabbathdesecration', 'systeinisthestate', 'carefullyselected', 'theseintroductory', 'brecorrespondent', 'sikteefitlfeentuty', 'perfectlycertain', 'reprefientatives', 'concordance--subject', 'concernministers', 'universally-binding', 'its-constitutionality', 'anti-sunday-work', 'alvjetriiezica-int', 'abookforthechildren', 'rapidly-approaching', 'coinmuniccations', 'notuhnesreeomwlny', 'singlosubsoription', 'state--possesses', "frow'massachusetts", 'iipiiibsbirreinin', 'self-justification', 'gitvtriistattnts', "mechanicar'processes", 'papillaryattradtion', 'ireceivedthebibleingoodcondition', 'tttttttttttttttttttttttttttti', 'self-righteousness', 'uncompromisimily', 'fundamentalprinciples', 'frommassachusetts', "diligently'instruct", 'criminalzofficer', 'iubocnidcmsatlrae', 'vastexpenditures', 'noresponsibility', 'instructiongiven', 'forbearingoneanotherandforgivingoneanon', 'rightfullypossess', 'waterburyamerican', 'occasionally-found', 'whichhasforitsobjectaunionofchurchand', 'itisacompletehistoricalanddescriptivesummary', 'spirit-wrestlers', 'aiviericansentinele', 'thecommonwealths', 'illustriousvisitor', 'teodmyuapratliyving', 'responsibilflicted', 'non-professional', 'chrisrequirements', 'payingsecurities', 'unitedstatesconstitution', 'humorist-philosopher', 'thereligpossible', "politically'included", 'itsotsvesisssiti', "l'atrztomiwil'illf", "'reconcentrados'", 'm-hintthyltsfftr', 'immobility--that', 'bestandbiggestnewspaper', 'government-endowed', 'commandment-keeping', 'the-incipleneyof', 'exemption-appendix', 'itlymnvmmninntvivirmlnyrninnyvvrtfummrmymvmmyy', 'lieutenant-general', 'this-communication', "tourists'edition", 'uneompromisingly', 'overorganization', 'lspeadebilitated', 'nationalreligion', 'civilgovernineat', 'interferencemust', 'religious-legislation', 'isdiscriminationand', 'handsomelyillustrated', 'distribution-win', 'eternally-enduring', 'accuratepronuncia', 'pulpit-reverencing', 'counter-arguments', 'damefashionandherslaves', 'certainunalienable', 'andalsoofthedominionof', 'temporalgovernments', 'respectable-looking', 'differencelbetween', 'totouristsandallclassesofinvalids', 'ofpageitwillbeseenthattherevisedsieornreads', 'andyoushouldreadit', 'presenting-popish', 'righteousnessthat', 'citerdreilediettlith', 'interferencewith', 'these-dissenting', "american'executive", 'suchanti-christian', 'union--embracing', 'thisencouragement', 'god-in-the-constitution', 'repudiconscience', 'congressman-elect', 'elfavpronouncing', '------------------------', 'fellow-passenger', "the'''onventional", 'circumstancesithe', 'concerningthismuch', 'theresponsibilities', 'miiiiimiiiiiimiiiiimiiiiimiiiiim', 'totouristsandall', 'multi-millionaire', 'infaithfulwarning', 'xpositionbnildings', 'issofarasregards', 'trans-continental', 'tleeeeeseeeemeetreeeeeeleeeeoweeek', "negoweenalkogee'ree'lkowee'ftielieiegeseilielelereennellege", 'witgibettbacription', 'arrestediprpvided', 'icarapynoicrovray', 'vrecrlanrrsefeinretinuelerietso', 'iaicificpresspublishing', 'qualificaproperly', 'andsuperiorgoodness', 'bois-de-boulogne', "pernicious'effects", 'endangeringamerican', 'sundaymuseum-closing', "alldenominations'", 'half-disheartened', 'corncommandments', 'dishonuncivilized', 'receivingtheamericansentinel', 'catarrhinhalerfree', 'ex-congregationalist', 'christian-civilization', 'practicestouching', 'b-uc-h-a-d-n-e-z-z-a-r', 'betfererigagerfents', 'isthepapacyinprophecy', 'nearly-co-extensive', 'iimmuttimtesetstliumilimosillumetuilmtounntimmilitemttlirmillotmultm', 'declaratiorrstates', 'looselegislation', 'withhandsomedesignincolors', 'igshallwbheicdiiot', 'ameeicansentinel', 'threedollar-a-day', 'fellow-clergymen', 'covxaxalkaifrtil', 'beulicompromisinglyopposedto', 'gospelredemption', 'thatzwouhaveilaws', "legislativ'fhalls", 'irreparabledamage', "world'sfairinchicago", 'sparsely-settled', 'school-inspector', 'sundaynewspapers', 'blood-guiltiness', 'thoroughnational', 'unparlianientary', 'postmastergeneral', 'anti-reformation', 'theirconfinement', 'rougotrikitgeusp', 'pliiiilloototiollipimpiiitilligill', 'turbulently-inclined', 'followingpropositions', 'definitelylocated', 'articlesfrformom', 'representativesfromdifferentpar', 'caytoaulhoavgeoafnyouindepuaobflpicuraethiaosinngs', 'amsterdampleyden', 'andtoforbideverythingwhichisnot', 'unrrecardenpraid', 'politicalteligionists', 'containinghotiseholdand', 'aleaderofthenewdemocracy', 'whatever-standpoint', 'selfcontradictory', 'director-general', 'anti-expansionists', 'tilitakgilowledg', 'inspectorgeneral', 'llitttitittittilltja', 'well-proportioned', 'receiver-general', 'overwfieliningaria', 'ptillsfilielesddigrallrgt', 'loverofcivilandreligiousliberty', 'appropriationonsunday', 'cannot-buy-or-sell', 'constideclaration', 'civilrgovernment', 'successfullyused', 'super-calendered', 'oliwethoebbjercetekaindr', 'imidinovosillyisp', 'governmentappointed', "'self-government", 'butthecommandmentsofgodapplytothe', 'successfullydprosecuted', 'widely-different', 'nagwordsabandebelhievaeqnat', 'self-gratification', 'fellowcommissioners', 'thelawswhichprotectitare', 'counter-political', 'thicklyinhabited', 'bbelebnleunsienagrl', 'touitrrehinelieteitnhi', 'robber-chieftans', 'isdtointctreasekthesubscriptitoinlisltoffthe', 'thegroundsandartgalleriesmightbeopen', 'foritseinipirneeaitteiothertaltssittistninuitsrreistleilice', 'thelliibbeerralliitty', 'iiinssaerksetooasaas', 'exercisethemselves', 'pagancounterfeitsinbtyhe', 'antipedobaptists', 'notthelawsregardingsundayobservanceaconcession', 'prohibitszfreedom', 'iinothingbetterpublished', 'thfiftlestidifiblimitirlitif', 'religiousliberty', 'fiftyonethousand', 'protestantseatholics', 'twenty-four-hour', 'sanctificationist', 'developmenthasvaried', 'itmakesallthedifferencein', 'ltoobothforemote', 'christianitywould', 'highly-civilized', 'elementarycdaution', 'recfeoivreydeianrrsefeirnegnucieriteso', 'appeal-avalanche', 'verbatimreportofthespeechesof', 'austro-hungarian', 'ileustrationsare', 'ifyoupreferthehalf', 'legislaincorporation', 'consciencewillbe', 'scientifically-proved', 'thtishbeoomhahnauscbreipsst', 'websterdictionary', 'non-church-goers', 'politico-ethical', 'amendmentproposed', 'sectioh-rdeclared', 'isunconstitutionaland', 'selfinterpreting', 'willbesenttoanyaddressonre', 'itwasshowntohimandto', 'churchmembership', 'thanksgiving-days', 'thatscivilization', 'no-day-in-particular', "religiopolitical'", 'street-preaching', 'eieleeriieseceix', 'american-catholic', 'wanttopraylongandprayearnestlyand', 'personal-liberty', 'lengtlireonimunication', 'pacificbainescollege', "'representative'", 'sciatriameagency', 'elkhartcaizeiage', 'theseextraordinary', 'butwhetherprophecies', 'writefordeseriptivecatalogue', 'previousviotation', 'divinely-conferred', 'dynasty-stricken', 'sunday-journalism', 'yodfamaegxedceolfflereednatt', 'theweightofoneof', 'spiritually-minded', 'sabbath-observance', 'renderingallegiance', 'miiiiimiiiiimitlin', "correspondent'of", 'seriously-minded', "ithe'iinpossibillw", 'sitoorrioraltity', 'sthvatlusaobtreuafohrte', 'dearlyunderstand', 'interestsaffected', 'gmakeasurecovenant', 'receivesappropriations', 'interffeerreennccee', 'sunday-closinglaws', 'liberty-exemplifying', 'circularsandfull', 'beuncompromisinglyopposed', 'policemanization', 'reefeoiredyeiaerrsefe', 'thanksgiving-day', 'discriminationshall', 'bottomwithmetalandready', 'thtishbeomokahnauscbreipent', 'libertypossessed', "'putratherthayte", 'theissueswhichthispaperdiscussesarethe', 'non-ecclesiastical', 'theauthorhassoughttomakethisbookone', 'civilly-enforced', 'whiicchhppiicture', 'toultknlitttlftten', 'present-president', 'divisiondvizithenndividual', 'no-entanglingalliance', 'these-jealous-minded', 'congresstoopposetheadoptionofthejointresolution', "didn'tiresigniand", 'especiallyforconvertingnutsintobutterfor', 'fikerfilneakatist', 'otherimprovements', 'thattheyareinerror', "liberty'association", 'seventh-partof-time', 'butthewatermanisthebest', 'kblifibrbatatelto', 'alutroroxitiphroheil', 'historyofamerica', 'slagglezatattogiveitv', 'lottelltrzoistax', 'wehaveaselectstockofthisbeautifulandinstructive', 'lduidcircorouuss', 'lewatedboyerlifer', 'imchangeableness', 'foracieidanpraco', 'establishingamerican', 'exciteadmiration', 'oublisheitquarterly', "constitufiou'and", 'sliiiiimiliinneliiiiim', "'constitutional'", "missionary'incitiful", 'an-impossibility', 'renderacceptible', 'beenlrecommended', 'straight-fromthe-shoulder', 'mudthestreasoning', 'narrowest-minded', 'half-consolidated', 'thwompromisingly', 'sabbath--desecration', 'suclitsltyithuetaelfthofrutle', "profess'clitistianity", 'thoroughlyintroduce', "resolution'adopted", 'harmlesstmeeting', "self-government'", 'areportofthehearingonthesundayclosingof', 'pauobflpicuarcthiaosins', 'long-established', "wew'ilrleceivepostagestampsinsmallquantitiesandanykindofgood", 'bishop-assistant', 'sabbathobservance', 'seventh-dayadventist', 'self-pronunciation', 'possessinterposes', 'puritan-american', 'unscripturalalso', 'self-aggrandizement', 'alifornimiligsts', "missionary's'work", 'significaquestion', 'individualfreedom', 'andveryseldomevenin', 'misunderdulgence', "german'missionaries", 'miiiiimiiiiiimiiiiim', 'beingtaughtinourschools', 'breckhriagesundaybill', 'communityseventy-five', 'carefully-gleaned', 'plainly-apparent', 'wtgeantseacteirvye', "legislative'halls", 'iljniprieciidienirbe', 'ahatihntenaarvelai', 'healthandtemperancemiscellany', "imperial'government", 'german-americans', 'daintily-prepared', 'divinelyappointed', 'curiosity-seeking', 'practiceintosmall', 'treasurer-general', 'widelyrecognized', 'dfamaegxedceolfielreednatt', 'non-parishioners', 'civilgovernments', 'non-interruption', 'prevailingneglect', 'post-intelligencer', 'thunderingcataract', 'underconsumption', 'miiiiimiiiiimiciiiimiiimmiiiiim', 'counsellor-at-law', 'andthesehavebeenturnedover', 'constitutionallimitations', 'civilandreligiousfreedom', 'aecyteiryyewhere', 'rvsepvitittauarltsic', ...], 15)
Correction 8 -- Remove long error tokens¶
# %load shared_elements/remove-tokens-with-long-strings-of-characters.py
prev = cycle
cycle = "correction8"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
sub_list = ["m|M", "e|E", "f|F", "l|L", "i|I", "t|T"]
replacements = []
for sub in sub_list:
replacements.append(clean.check_for_repeating_characters(tokens, sub))
replacements = [item for sublist in replacements for item in sublist]
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
AmSn18890710-V04-24-page7.txt: [('PACIFICBilliollEoll', ' ')] AmSn18911126-V06-46-page1.txt: [('PliiiilloototiollIPIMPiiitilligill', ' ')] AmSn18921020-V07-41-page1.txt: [('iiiiiiiiiiiiii', ' ')] AmSn18960402-V11-14-page3.txt: [('IIIIIIIIIIIIIIIII', ' '), ('IIIIIIIIIIIIIII', ' ')] AmSn18960924-V11-38-page5.txt: [('INTERFFEERREENNCCEE', ' ')] AmSn18980113-V13-02-page1.txt: [('ifigiiiiiiiiiiii', ' ')] AmSn18980120-V13-03-page1.txt: [('iiiiiiiiiiiiiiii', ' '), ('AilliliilliiiiIIIMIIIIII', ' '), ('MENIIIiiiiiiii', ' ')] AmSn18990202-V14-05-page13.txt: [('tleeeeeseeeemeetreeeeeeleeeeoweeek', ' ')] AmSn18990810-V14-31-page14.txt: [('MIIIIIMIIIIIIMIIIIIMIIIIIMIIIIIM', ' ')] AmSn18990817-V14-32-page14.txt: [('MMVIMAIMUMMIAMIAMIIIMMINEMIIMMIIIMMOM.', ' '), ('MIIIIIiiIIIIMS', ' '), ('MMVIMAIMUMMIAMIAMIIIMMINEMIIMMIIIMMOM.', ' '), ('MMIIMIIIIIIIMIIIIIIMIIIIMM', ' ')] AmSn18990824-V14-33-page14.txt: [('IIIIIMIIIIIMMIIIIINI', ' '), ('MIXIIMIIIIIMIIIIIINNIIIIIME', ' ')] AmSn18990831-V14-34-page14.txt: [('MIIIIIIMAIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIIMmimMliiiIM', ' ')] AmSn18990907-V14-35-page14.txt: [('MIIIIIMMIIIIIIMIIIIIM', ' ')] AmSn18990914-V14-36-page14.txt: [('SliiiiiMIliinneliiiiim', ' ')] AmSn18990914-V14-36-page15.txt: [('eitifiltWiffeffalliallill', ' ')] AmSn18990928-V14-38-page14.txt: [('militiMIIIIIMIllirMIIIIIMIIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIM', ' ')] AmSn18991019-V14-41-page14.txt: [('IMIIIIIMIIIIIMIIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIMIN', ' '), ('MIIIIIMIIIIIMIIIIIM', ' ')] AmSn18991102-V14-43-page14.txt: [('MIIIIIIMMIIIIIM', ' '), ('MIIIIIMIIIIIMIIIIII.', ' ')] AmSn18991109-V14-44-page14.txt: [('MItttIMIIIIIIIIIIMIIIIIMIIIII', ' ')] AmSn18991207-V14-48-page15.txt: [('M.IIIIIMIIIIIMIIIIIMIIIIMICIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIM', ' ')] AmSn18991214-V14-49-page15.txt: [('iiiiiiiiiiiiiiii', ' ')] AmSn18991228-V14-50-page15.txt: [('ImmummtimmomOmmumMommON.MMI.O.Wilimm.MMERRIMMiluMMummuliMMmummillunmil.MminmMuummunmmummismimmil.mmlimmmulimmili.mmsmWtimMmuummlimmw.m.m.ft.mammW.M', ' '), ('mimmummulimmOUmunnmOluimmmumm.m.Mumnaum.mlimmmummumilMi.Mmuimft', ' ')] AmSn19000104-V15-01-page15.txt: [('MIIIIIMIIIIIMIIIIIMIIIIIMIIIIIM', ' ')] AmSn19000111-V15-02-page14.txt: [('MIIIIIMIIIIIMICIIIIMIIIMMIIIIIm', ' ')] AmSn19000118-V15-03-page14.txt: [('MIITIIMIIIIIM', ' '), ('MIIIIIMIIIIIIMIIIIIMIIMMIIIIIM', ' ')] AmSn19000215-V15-07-page13.txt: [('IIIIIIMIIIIIMaiiiiimulimM', ' ')] AmSn19000301-V15-09-page13.txt: [('ImprimmommwmimmoVum', ' ')] AmSn19000308-V15-10-page14.txt: [('MIIIIIMIIIIIIMIIIIIM', ' ')] AmSn19000322-V15-12-page14.txt: [("NegoweeNalkogee'Ree'lkowee'ftielieiegeseilielelereeNnellege", ' ')] AmSn19000329-V15-13-page14.txt: [('eeeeeeeeeeeeeeee', ' ')] AmSn19000517-V15-19-page11.txt: [('TYMMITIMMITIVIIMMIIMMIll', ' ')] AmSn19000621-V15-24-page15.txt: [('TTTTIIIIIITYTTTTTTTTTTF', ' '), ('LLITTTITITTITTILLTJa', ' ')] AmSn19000705-V15-26-page15.txt: [('TTTTTTTTTTTTTTTTTTTTTTTTTTTTI', ' ')] AmSn19000719-V15-28-page10.txt: [('TITMITMMITIMMVITIMMVIMIMIM', ' ')] AmSn19000823-V15-33-page15.txt: [('TTTTTTTTTTTTTTTTTTTTTTTTTTTTLI', ' '), ('ITTTTTTTTTTTTTTTTTITTTT', ' ')] AmSn19000823-V15-33-page8.txt: [('itlymnvmmninntvivirmlnyrninnyvvrtfummrmymvmmyy', ' ')] AmSn19000906-V15-35-page15.txt: [("TTTTTTTTIII'TTTTI", ' '), ('TTTTTTTTTTTTTTT', ' ')] AmSn19000920-V15-37-page15.txt: [('ITTTTTTTTTTTTTTTTTTTI', ' ')]
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction8 Average verified rate: 0.9830200792014474 Average of error rates: 0.01865562518651149 Total token count: 8363231
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
[("'", 8053), ('t', 4426), ('e', 3978), ('d', 3950), ('w', 3754), ('co', 3408), ('m', 3171), ('n', 3046), ('f', 2035), ('r', 2028), ('th', 1645), ('g', 1371), ('mo', 1160), ('u', 926), ('x', 864), ('ex', 521), ('pa', 410), ('q', 399), ('sunday-law', 334), ('k', 315), ("the'", 304), ('pp', 299), ('tion', 276), ("conscience'", 260), ('ch', 253), ('seventhday', 249), ('re', 224), ('ga', 220), ('oc', 218), ('z', 215), ('wm', 215), ('satolli', 210), ('employes', 209), ('munn', 207), ('ti', 200), ('id', 181), ('un', 173), ('ry', 170), ('al', 166), ('sunday-closing', 160), ('ca', 151), ('ment', 146), ('chain-gang', 136), ("to'", 134), ('nd', 130), ('ll', 128), ('lb', 125), ('il', 123), ('bateham', 122), ('cmsar', 121)]
Correction 9 -- Separate Squashed Words¶
# %load shared_elements/separate_squashed_words.py
import pandas as pd
from math import log
prev = cycle
cycle = "correction9"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
verified_tokens = []
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
clean.get_approved_tokens(content, spelling_dictionary, verified_tokens)
tokens_with_freq = dict(collections.Counter(verified_tokens))
words = pd.DataFrame(list(tokens_with_freq.items()), columns=['token','freq'])
words_sorted = words.sort_values('freq', ascending=False)
words_sorted_short = words_sorted[words_sorted.freq > 2]
sorted_list_of_words = list(words_sorted_short['token'])
wordcost = dict((k, log((i+1)*log(len(sorted_list_of_words)))) for i,k in enumerate(sorted_list_of_words))
maxword = max(len(x) for x in sorted_list_of_words)
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = utilities.strip_punct(content)
tokens = utilities.tokenize_text(text)
replacements = []
for token in tokens:
if not token.lower() in spelling_dictionary:
if len(token) > 17:
if re.search(r"[\-\-\'\"]", token):
pass
else:
split_string = clean.infer_spaces(token, wordcost, maxword)
list_split_string = split_string.split()
if clean.verify_split_string(list_split_string, spelling_dictionary):
replacements.append((token, split_string))
else:
pass
else:
pass
else:
pass
if len(replacements) > 0:
print("{}: {}".format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
AmSn18860101-V01-01-page3.txt: [('accordpreservation', 'accord preservation')] AmSn18860301-V01-03-page6.txt: [('indifferenumeration', 'in differ enumeration')] AmSn18860301-V01-03-page8.txt: [('whichhasforitsobjectaunionofchurchand', 'which has for its object a union of church and'), ('whichweresosuddenlyconvertedintoreligiopolitical', 'which were so suddenly converted into religio political')] AmSn18860501-V01-05-page1.txt: [('establishredemption', 'establish redemption')] AmSn18860601-V01-06-page1.txt: [('theresponsibilities', 'the responsibilities')] AmSn18860601-V01-06-page6.txt: [('legislaincorporation', 'leg is lain corporation')] AmSn18860901-V01-09-page6.txt: [('Presbyterianbrother', 'Presbyterian brother')] AmSn18861001-V01-10-page7.txt: [('elementaryprinciples', 'elementary principles')] AmSn18861201-V01-12-page8.txt: [('WEhaveaselectstockofthisbeautifulandinstructive', 'WE have a select stock of this beautiful and instructive'), ('Thebookisneatlygottenup', 'The book is neatly gotten up'), ('printedongoodpaper', 'printed on good paper')] AmSn18870101-V02-01-page3.txt: [('practicepersecutionfor', 'practice persecution for')] AmSn18870301-V02-03-page2.txt: [('heartyacknowledgnaont', 'hearty a c know led g n a o n t')] AmSn18870501-V02-05-page8.txt: [('willeverbeuncompromisingly', 'will ever be uncompromisingly')] AmSn18870701-V02-07-page4.txt: [('Christianinstitutions', 'Christian institutions')] AmSn18870801-V02-08-page2.txt: [('determinationforesaid', 'determination foresaid')] AmSn18871001-V02-10-page8.txt: [('sanctimoniouspolitical', 'sanctimonious political')] AmSn18880401-V03-04-page5.txt: [('followingpropositions', 'following propositions')] AmSn18880601-V03-06-page7.txt: [('Stevensonandhisconstituents', 'Stevenson and his constituents')] AmSn18881001-V03-10-page7.txt: [('havebeennoneofourbusiness', 'have been none of our business')] AmSn18881015-V03-10a-page7.txt: [('beingtaughtinourschools', 'being taught in our schools'), ('andevenherecognizesinthe', 'and even he recognizes in the')] AmSn18881201-V03-12-page4.txt: [('ProhibitionBlasphemy', 'Prohibition Blasphemy')] AmSn18890213-V04-04-page3.txt: [('brieflycomprehended', 'briefly comprehended')] AmSn18890320-V04-09-page7.txt: [('recuperatingqualities', 'recuperating qualities')] AmSn18890327-V04-10-page7.txt: [('LOVEROFCIVILANDRELIGIOUSLIBERTY', 'LOVER OF CIVIL AND RELIGIOUS LIBERTY')] AmSn18890417-V04-13-page7.txt: [('Avigorousandstirringad', 'A vigorous and stirring ad')] AmSn18890424-V04-14-page7.txt: [('SCIENTIFICAMERICAN', 'SCIENTIFIC AMERICAN')] AmSn18890515-V04-16-page7.txt: [('sufferingswhichtimelyattentionmighteasilyhaveprevented', 'sufferings which timely attention might easily have prevented')] AmSn18890522-V04-17-page7.txt: [('Theissueswhichthispaperdiscussesarethe', 'The issues which this paper discusses are the')] AmSn18890522-V04-17-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')] AmSn18890605-V04-19-page7.txt: [('ButaGoodOneisaJoyForever', 'But a Good One is a Joy Forever'), ('SCIENTIFICAMERICAN', 'SCIENTIFIC AMERICAN')] AmSn18890612-V04-20-page7.txt: [('LEADEROFTHEACEINPRACTICALIMPROVEMENTS', 'LEADER OF THE ACE IN PRACTICAL IMPROVEMENTS')] AmSn18890703-V04-23-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')] AmSn18890710-V04-24-page7.txt: [('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION')] AmSn18890724-V04-26-page7.txt: [('PACIFICBainesCollege', 'PACIFIC B a ines College'), ('yousupposethatthegovernment', 'you suppose that the government')] AmSn18890807-V04-28-page2.txt: [('brieflycomprehended', 'briefly comprehended')] AmSn18890807-V04-28-page3.txt: [('Earthlygovernments', 'Earthly governments')] AmSn18890807-V04-28-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')] AmSn18890821-V04-30-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')] AmSn18890828-V04-31-page2.txt: [('temporalexpediency', 'temporal expediency')] AmSn18890828-V04-31-page8.txt: [('Allthereformswhichtheassociationregard', 'All the reforms which the association regard')] AmSn18890905-V04-32-page1.txt: [('afterconsideration', 'after consideration')] AmSn18890905-V04-32-page2.txt: [('theUnitedStatesSenate', 'the United States Senate')] AmSn18890918-V04-34-page4.txt: [('ChristianStatesman', 'Christian Statesman')] AmSn18890918-V04-34-page8.txt: [('profitableemployment', 'profitable employment')] AmSn18890925-V04-35-page7.txt: [('THEMACHINEHASNEVERBEENUSED', 'THE MACHINE HAS NEVER BEEN USED'), ('THEWEEKLYWISCONSIN', 'THE WEEKLY WISCONSIN'), ('THEWEEKLYWISCONSIN', 'THE WEEKLY WISCONSIN')] AmSn18891002-V04-36-page7.txt: [('thedifferentStates', 'the different States'), ('THEMACHINEHASNEVERBEENUSED', 'THE MACHINE HAS NEVER BEEN USED')] AmSn18891016-V04-38-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')] AmSn18891023-V04-39-page6.txt: [('counterattractions', 'counter attractions')] AmSn18891030-V04-40-page7.txt: [('EMPLOYMENTandWEESE', 'EMPLOYMENT and WEE S E')] AmSn18891106-V04-41-page7.txt: [('toTouristsandallclassesofInvalids', 'to Tourists and all classes of Invalids')] AmSn18891113-V04-42-page7.txt: [('ThePicturesqueRouteforBusinessand', 'The Picturesque Route for Business and')] AmSn18891113-V04-42-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')] AmSn18891120-V04-43-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')] AmSn18891127-V04-44-page2.txt: [('ecclesiasticocivil', 'ecclesiastic o civil')] AmSn18891127-V04-44-page5.txt: [('thedowadelegationrand', 'the do wade legation rand')] AmSn18891127-V04-44-page7.txt: [('BESTandBIGGESTNEWSPAPER', 'BEST and BIGGEST NEWSPAPER')] AmSn18891204-V04-45-page5.txt: [('demonstradepriving', 'demons trade p riving')] AmSn18891204-V04-45-page7.txt: [('oftextsforeverydayintheyear', 'of texts for everyday in the year'), ('Abookforboysandgirls', 'A book for boys and girls'), ('willbesenttoanyaddressonre', 'will be sent to any address on r e')] AmSn18891211-V04-46-page7.txt: [('AcresLandintheaboveCounties', 'Acres Land in the above Counties'), ('Theauthorhassoughttomakethisbookone', 'The author has sought to make this book one'), ('HistoryofProtestantism', 'History of Protestantism')] AmSn18891218-V04-47-page8.txt: [('intentionallyignore', 'intentionally ignore')] AmSn18891225-V04-48-page7.txt: [('tainingtestimonials', 'tain ing testimonials')] AmSn18891225-V04-48-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')] AmSn18900116-V05-03-page7.txt: [('inanypropersenseofthe', 'in any proper sense of the')] AmSn18900130-V05-05-page3.txt: [('studiouslyssecular', 'studiously s secular')] AmSn18900130-V05-05-page7.txt: [('therecommendations', 'the recommendations')] AmSn18900206-V05-06-page7.txt: [('SendittoYourFriends', 'Send it to Your Friends'), ('Papacywasfullydeveloped', 'Papacy was fully developed')] AmSn18900206-V05-06-page8.txt: [('singlosubsoription', 'sing lo sub so rip t i o n')] AmSn18900213-V05-07-page4.txt: [('barefacedmisrepresentationandbytheidr', 'barefaced misrepresentation and by the i dr')] AmSn18900213-V05-07-page7.txt: [('furtherinformation', 'further information')] AmSn18900306-V05-10-page1.txt: [('THEAMERICANSENTINEL', 'THE AMERICAN SENTINEL')] AmSn18900320-V05-12-page6.txt: [('inestimableblessings', 'inestimable blessings')] AmSn18900320-V05-12-page7.txt: [('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')] AmSn18900327-V05-13-page7.txt: [('PresbyterianChurches', 'Presbyterian Churches')] AmSn18900403-V05-14-page5.txt: [('politicalreligious', 'political religious')] AmSn18900410-V05-15-page7.txt: [('UNITEDSTATESCONSTITUTION', 'UNITED STATES CONSTITUTION'), ('pieceandSIXBEAUTIFULLYCOLOREDPLATES', 'piece and SIX BEAUTIFULLY COLORED PLATES'), ('containinghotiseholdand', 'containing hot is e h o l d a n d')] AmSn18900410-V05-15-page8.txt: [('beuncompromisinglyopposed', 'be uncompromisingly opposed')] AmSn18900417-V05-16-page7.txt: [('UNITEDSTATESCONSTITUTION', 'UNITED STATES CONSTITUTION')] AmSn18900424-V05-17-page8.txt: [('singlesubscription', 'single subscription')] AmSn18900508-V05-19-page5.txt: [('nationalcharacteristic', 'national characteristic'), ('spiritualmindedness', 'spiritual mindedness')] AmSn18900515-V05-20-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')] AmSn18900522-V05-21-page2.txt: [('TOhimthatworkethnotbutbelieveth', 'TO him that worketh not but believeth'), ('Forbearingoneanotherandforgivingoneanon', 'For bearing one another and forgiving one an on')] AmSn18900522-V05-21-page6.txt: [('Congresstoopposetheadoptionofthejointresolution', 'Congress to oppose the adoption of the joint resolution')] AmSn18900529-V05-22-page7.txt: [('idsimpleconstruction', 'ids imp le construction')] AmSn18900605-V05-23-page7.txt: [('PACIFICPRESSMUSSINGCO', 'PACIFIC PRESS M U S S I N G C O'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY'), ('EstablishedonFourContinents', 'Established on Four Continents')] AmSn18900612-V05-24-page7.txt: [('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')] AmSn18900626-V05-26-page7.txt: [('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')] AmSn18900717-V05-28-page7.txt: [('EstablishedonFourContinents', 'Established on Four Continents')] AmSn18900731-V05-30-page7.txt: [('similarstylesformerlysoldandstiltofferedatmuch', 'similar styles formerly sold and stilt offered at much'), ('Verbatimreportofthespeechesof', 'Verbatim report of the speeches of')] AmSn18900807-V05-31-page1.txt: [('authoritativelywhat', 'authoritatively what')] AmSn18900807-V05-31-page7.txt: [('TheSalemWitchcraft', 'The Salem Witchcraft'), ('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION')] AmSn18900821-V05-33-page7.txt: [('BattleCreekBakeryCo', 'Battle Creek Bakery C o'), ('CIVILGOVERNMENTANDRELIGIONDUPLICATING', 'CIVIL GOVERNMENT AND RELIGION DUPLICATING')] AmSn18900828-V05-34-page7.txt: [('MERICANINSTITUTIONS', 'MER I CAN INSTITUTIONS'), ('CIVILGOVERNMENTANDRELIGIONDUPLICATING', 'CIVIL GOVERNMENT AND RELIGION DUPLICATING')] AmSn18900918-V05-37-page1.txt: [('compelleartaLligten', 'compel le art a L l i g t e n')] AmSn18900918-V05-37-page7.txt: [('Sentbymailonreceiptofprice', 'Sent by mail on receipt of price')] AmSn18900925-V05-38-page7.txt: [('HEALTHFOODCRACKERS', 'HEALTH FOOD CRACKERS')] AmSn18901002-V05-39-page1.txt: [('whateverappropriation', 'whatever appropriation')] AmSn18901002-V05-39-page7.txt: [('Constitutionsoldiery', 'Constitution soldiery')] AmSn18901009-V05-40-page8.txt: [('weasChristianworkersinthecauseofChristdeem', 'we as Christian workers in the cause of Christ deem'), ('UnitedStatesConstitutionasit', 'United States Constitution as it')] AmSn18901023-V05-42-page1.txt: [('correspondentadmits', 'correspondent admits')] AmSn18901023-V05-42-page4.txt: [('ionastotherightOfGovernmenttocorrect', 'ion as to the right Of Government to correct')] AmSn18901030-V05-43-page7.txt: [('Ioutlinesvividlytherelationthatexistedbetween', 'I outlines vividly the relation that existed between'), ('TheAmericanSabbathUnion', 'The American Sabbath Union')] AmSn18901113-V05-45-page6.txt: [('Thenriollownumerous', 'Then rio l low numerous')] AmSn18901113-V05-45-page7.txt: [('THENATIONALSUNDAYLAWbanned', 'THE NATIONAL SUNDAY LAW ban ned')] AmSn18901218-V05-50-page10.txt: [('revolutionaryresolution', 'revolutionary resolution')] AmSn18910108-V06-02-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY'), ('OfassortedHealthFoodCrackerssentpost', 'Of assorted Health Food Crackers sent post'), ('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')] AmSn18910115-V06-03-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY'), ('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')] AmSn18910212-V06-07-page3.txt: [('certainunalienable', 'certain unalienable')] AmSn18910212-V06-07-page7.txt: [('HEALTHFOODCRACKERS', 'HEALTH FOOD CRACKERS')] AmSn18910226-V06-09-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')] AmSn18910312-V06-11-page7.txt: [('furtherparticulars', 'further particulars')] AmSn18910319-V06-12-page7.txt: [('PEOPLINGOFTHEEARTH', 'PEOPLING OF THE EARTH'), ('UNITEDSTATESCONSTITUTION', 'UNITED STATES CONSTITUTION')] AmSn18910319-V06-12-page8.txt: [('singlesubscription', 'single subscription')] AmSn18910402-V06-14-page7.txt: [('PEOPLINGOFTHEEARTH', 'PEOPLING OF THE EARTH')] AmSn18910409-V06-15-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')] AmSn18910409-V06-15-page8.txt: [('specialarrangement', 'special arrangement')] AmSn18910416-V06-16-page6.txt: [('inscriptionianitig', 'inscription ian it i g'), ('differentreligious', 'different religious')] AmSn18910416-V06-16-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')] AmSn18910430-V06-18-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')] AmSn18910507-V06-19-page7.txt: [('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')] AmSn18910521-V06-21-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')] AmSn18910723-V06-29-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')] AmSn18910730-V06-30-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')] AmSn18910827-V06-34-page6.txt: [('ReligiousLibertyAssociation', 'Religious Liberty Association')] AmSn18910903-V06-35-page7.txt: [('STHEPAPACYINPROPHECY', 'S THE PAPACY IN PROPHECY')] AmSn18910910-V06-36-page7.txt: [('TheHouseholdMonitorofHealth', 'The Household Monitor of Health'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')] AmSn18910917-V06-37-page7.txt: [('TheHouseholdMonitorofHealth', 'The Household Monitor of Health'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')] AmSn18910924-V06-38-page7.txt: [('TheHouseholdMonitorofHealth', 'The Household Monitor of Health'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')] AmSn18911008-V06-39-page7.txt: [('ThunderingCataract', 'Thundering Cat ar act'), ('TheHouseholdWalterofHealth', 'The Household Walter of Health'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')] AmSn18911015-V06-40-page7.txt: [('BYWILLIAMJACKSONARMSTRONG', 'BY WILLIAM JACKSON ARMSTRONG')] AmSn18911029-V06-42-page7.txt: [('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY'), ('LateCommissionerof', 'Late Commissioner of')] AmSn18911224-V06-50-page7.txt: [('petitionsoftheLongIslandfishermen', 'petitions of the Long Island fishermen')] AmSn18920107-V07-01-page6.txt: [('denominationalists', 'denomination a lists')] AmSn18920204-V07-05-page7.txt: [('andYOUshouldreadit', 'and YOU should read it')] AmSn18920211-V07-06-page1.txt: [('spiritualmindedness', 'spiritual mindedness')] AmSn18920218-V07-07-page3.txt: [('politicalinstitutions', 'political institutions')] AmSn18920310-V07-10-page8.txt: [('disestablishmentmay', 'disestablishment may')] AmSn18920324-V07-12-page6.txt: [('Sundayconcertintheoperahouse', 'Sunday concert in the opera house'), ('theCatholicChurchcannotdoany', 'the Catholic Church cannot do any')] AmSn18920324-V07-12-page7.txt: [('SAMPLECOPIESMAILED', 'SAMPLE COPIES MAI L ED')] AmSn18920407-V07-14-page7.txt: [('everypersoninterestedinmissionaryworkathome', 'every person interested in missionary work at home')] AmSn18920428-V07-17-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')] AmSn18920526-V07-21-page7.txt: [('SAMPLECOPIESMAILED', 'SAMPLE COPIES MAI L ED')] AmSn18920623-V07-25-page6.txt: [('NATIONALRELIGIOUSLIBERTY', 'NATIONAL RELIGIOUS LIBERTY'), ('representativesfromdifferentpar', 'representatives from different par')] AmSn18920630-V07-26-page3.txt: [('protectionaccorded', 'protection accorded')] AmSn18920721-V07-28-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')] AmSn18920728-V07-29-page5.txt: [('represeneverlasting', 'rep res en everlasting')] AmSn18920811-V07-31-page4.txt: [('therepresentatives', 'the representatives')] AmSn18920811-V07-31-page7.txt: [('theultrawingofthechurchpeople', 'the ultra wing of the church people'), ('thegroundsandartgalleriesmightbeopen', 'the grounds and art galleries might be open')] AmSn18921006-V07-39-page7.txt: [('ITISACOMPLETEHISTORICALANDDESCRIPTIVESUMMARY', 'IT IS A COMPLETE HISTORICAL AND DESCRIPTIVE SUMMARY')] AmSn18921013-V07-40-page8.txt: [('receivingTHEAMERICANSENTINEL', 'receiving THE AMERICAN SENTINEL')] AmSn18921020-V07-41-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')] AmSn18921027-V07-42-page6.txt: [('nineteentwentieths', 'nineteen twentieth s')] AmSn18921027-V07-42-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')] AmSn18921117-V07-45-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')] AmSn18921124-V07-46-page6.txt: [('Thereisnosafetyforourcountry', 'There is no safety for our country'), ('andsuperiorgoodness', 'and superior goodness'), ('SecretaryAmericanSecuraUnion', 'Secretary American Sec ur a Union')] AmSn18921201-V07-47-page2.txt: [('disconnectedherself', 'disconnected herself')] AmSn18930126-V08-04-page7.txt: [('persecutionbothinRussiaandGermany', 'persecution both in Russia and Germany'), ('notbeopenedonSunday', 'not be opened on Sunday'), ('isunconstitutionaland', 'is unconstitutional and')] AmSn18930223-V08-08-page7.txt: [('thattheyareinerror', 'that they are in error'), ('andthatthelawsofthe', 'and that the laws of the')] AmSn18930309-V08-10-page6.txt: [('Associationssentafrom', 'Associations sent a from')] AmSn18930309-V08-10-page7.txt: [('pointofdisturbance', 'point of disturbance'), ('Itwasshowntohimandto', 'It was shown to him and to')] AmSn18930323-V08-12-page1.txt: [('Governmentappointed', 'Government appointed')] AmSn18930608-V08-23-page8.txt: [('JudgeSteinoverruledthepleathat', 'Judge Stein overruled the plea that'), ('Itmakesallthedifferencein', 'It makes all the difference in')] AmSn18930615-V08-24-page5.txt: [('recentlyinterviewed', 'recently interviewed')] AmSn18930622-V08-25-page7.txt: [('BibleObjectLessons', 'Bible Object Lessons'), ('SongsforLittleOnes', 'Songs for Little Ones')] AmSn18930629-V08-26-page7.txt: [('BATTLECREEKBAKERYCO', 'BATTLE CREEK BAKERY C O')] AmSn18930706-V08-27-page7.txt: [('AReportoftheHearingontheSundayClosingof', 'A Report of the Hearing on the Sunday Closing of'), ('ItEnablesEveryManandWoman', 'It Enables Every Man and Woman')] AmSn18930713-V08-28-page7.txt: [('BATTLECREEKBAKERYCO', 'BATTLE CREEK BAKERY C O')] AmSn18930713-V08-28-page8.txt: [('interestingthisweek', 'interesting this week')] AmSn18930720-V08-29-page6.txt: [('religiouspersecution', 'religious persecution')] AmSn18930727-V08-30-page6.txt: [('thejudgmentmayfallonanypartofasinfulnation', 'the judgment may fall on any part of a sinful nation')] AmSn18930803-V08-31-page7.txt: [('TheGemofHealthFoods', 'The Gem of Health Foods'), ('OurbabyisatestimonialtoSanitariumfood', 'Our baby is a testimonial to Sanitarium food'), ('andisasruddyandhealthya', 'and is as ruddy and healthy a')] AmSn18930810-V08-32-page7.txt: [('AppealandRemonstrance', 'Appeal and Remonstrance'), ('theFlorentineMartyr', 'the Florentine Martyr')] AmSn18930817-V08-33-page7.txt: [('TheGemofHealthFoods', 'The Gem of Health Foods')] AmSn18930907-V08-35-page7.txt: [('TheGemofHealthFoods', 'The Gem of Health Foods')] AmSn18930921-V08-37-page8.txt: [('feWoriiiiiresseTatthe', 'feW or iii i ir esse Tat the')] AmSn18931116-V08-45-page7.txt: [('adaptthelifeofChristtothe', 'adapt the life of Christ to the')] AmSn18931123-V08-46-page7.txt: [('SPECIALHOLIDAYOFFER', 'SPECIAL HOLIDAY OFFER')] AmSn18931221-V08-50-page7.txt: [('Bibleillustrationsandstoriesthatwehavebeen', 'Bible illustrations and stories that we have been')] AmSn18940125-V09-04-page1.txt: [('thechurchforAmerica', 'the church for America'), ('thenaturalallianceexisting', 'the natural alliance existing')] AmSn18940215-V09-07-page7.txt: [('andtobringdowndamnationandcursesuponevery', 'and to bring down damnation and curses upon every'), ('Couldfollybegreaterthanpraying', 'Could folly be greater than praying')] AmSn18940329-V09-13-page7.txt: [('withgeographicalstatisticalnotes', 'with geographical statistical notes')] AmSn18940412-V09-15-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')] AmSn18940419-V09-16-page5.txt: [('underacknowledgment', 'under acknowledgment')] AmSn18940524-V09-21-page4.txt: [('addireadytobreakandoverwhelmitinsocial', 'addi ready to break and overwhelm it in social')] AmSn18940621-V09-25-page7.txt: [('isthesameinallagesoftheworld', 'is the same in all ages of the world')] AmSn18940802-V09-31-page2.txt: [('toexerciseanycoercionwhatever', 'to exercise any coercion whatever')] AmSn18940823-V09-33-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')] AmSn18941004-V09-39-page5.txt: [('receivesappropriations', 'receives appropriations')] AmSn18941018-V09-41-page3.txt: [('anxietyoftheSundaypreachersisto', 'anxiety of the Sunday preachers is to'), ('nomorethanfollowing', 'no more than following')] AmSn18941018-V09-41-page4.txt: [('carriedoutinthenameofthewholeCatholiccommu', 'carried out in the name of the whole Catholic com m u')] AmSn18941101-V09-43-page8.txt: [('convertedintoaholiday', 'converted into a holiday'), ('Thelawswhichprotectitare', 'The laws which protect it are')] AmSn18941122-V09-46-page7.txt: [('ACRITICALHARMONYoftheGOSPELS', 'A CRITICAL HARMONY of the GOSPELS')] AmSn18941206-V09-48-page8.txt: [('fromtheAdventistprintingofficeistheSchutzenplatz', 'from the Adventist printing office is the S c hut z e n p l a t z')] AmSn18941213-V09-49-page7.txt: [('comingsolongastheservicesareheld', 'coming so long as the services are held'), ('wanttopraylongandprayearnestlyand', 'want to pray long and pray earnestly and'), ('comeoutopenlyinfavorofreligiousedu', 'come out openly in favor of religio use du'), ('hassoclearlyspoken', 'has so clearly spoken'), ('andhebaselyintimates', 'and he basely intimates'), ('WillpolishastovebetterClumany', 'Will polish a stove better C l u m a n y')] AmSn18950117-V10-03-page1.txt: [('illustriousvisitor', 'illustrious visitor')] AmSn18950124-V10-04-page7.txt: [('tenderrestsupontendrive', 'tender rests upon ten drive')] AmSn18950131-V10-05-page3.txt: [('merieanStatePapers', 'me rie an State Papers')] AmSn18950307-V10-10-page2.txt: [('notthelawsregardingSundayobservanceaconcession', 'not the laws regarding Sunday observance a concession'), ('wouldbecomeuniversal', 'would become universal')] AmSn18950425-V10-17-page6.txt: [('GoodSubstantialHighGradeBICY', 'Good Substantial High Grade BIC Y'), ('includingtheologians', 'including theologians')] AmSn18950509-V10-19-page4.txt: [('whatabouttheindividualwho', 'what about the individual who'), ('Transvaalgovernment', 'Transvaal government')] AmSn18950620-V10-25-page1.txt: [('Wemustthereforeconcludethatthe', 'We must therefore conclude that the')] AmSn18950725-V10-30-page7.txt: [('HISGLORIOUSAPPEARING', 'HIS GLORIOUS APPEARING')] AmSn18950725-V10-30-page8.txt: [('anduncompromisingly', 'and uncompromisingly')] AmSn18950822-V10-33-page5.txt: [('lieutenantgovernor', 'lieutenant governor')] AmSn18950822-V10-33-page7.txt: [('dayAdventistsandthecourts', 'day Adventists and the courts')] AmSn18950926-V10-38-page5.txt: [('domiriatelegislation', 'dom iri ate legislation')] AmSn18951010-V10-40-page7.txt: [('muchtoitseducationalvalue', 'much to its educational value')] AmSn18951017-V10-41-page7.txt: [('andtheschemewentthrough', 'and the scheme went through'), ('andthesehavebeenturnedover', 'and these have been turned over'), ('anddraggedhimofftojail', 'and dragged him off to jail'), ('andtoforbideverythingwhichisnot', 'and to forbid everything which is not'), ('theonlydangerwhichthreatenstheliberties', 'the only danger which threatens the liberties')] AmSn18951031-V10-43-page7.txt: [('butwhetherprophecies', 'but whether prophecies'), ('theyshallbedoneaway', 'they shall be done away')] AmSn18951121-V10-46-page7.txt: [('andveryseldomevenin', 'and very seldom even in')] AmSn18951128-V10-47-page3.txt: [('fellowcommissioners', 'fellow commissioners')] AmSn18951128-V10-47-page8.txt: [('THERIGHTSofthePEOPLE', 'THE RIGHTS of the PEOPLE')] AmSn18951219-V10-50-page7.txt: [('waspresentatthetrialoutlinestheproceed', 'was present at the trial outlines the proceed')] AmSn18960109-V11-02-page7.txt: [('andalsooftheDominionof', 'and also of the Dominion of'), ('Othermakesmaybegood', 'Other makes may be good'), ('buttheWATERMANISTHEBEST', 'but the WATERMAN IS THE BEST')] AmSn18960319-V11-12-page7.txt: [('SweepingEverything', 'Sweeping Everything')] AmSn18960326-V11-13-page7.txt: [('SweepingEverything', 'Sweeping Everything')] AmSn18960402-V11-14-page3.txt: [('RELIGIOUSintolerance', 'RELIGIOUS intolerance')] AmSn18960409-V11-15-page8.txt: [('Itfollowsthatthisisnot', 'It follows that this is not')] AmSn18960416-V11-16-page8.txt: [('whichisperfectlyproperifthey', 'which is perfectly proper if they')] AmSn18960430-V11-18-page6.txt: [('Sabbathdesecration', 'Sabbath desecration')] AmSn18960430-V11-18-page7.txt: [('AttractiveBoardCover', 'Attractive Board Cover')] AmSn18960514-V11-20-page7.txt: [('andpronouncesthemwith', 'and pronounces them with'), ('tobemightyupontheearth', 'to be mighty upon the earth')] AmSn18960702-V11-26-page7.txt: [('enablesittopromotethehealth', 'enables it to promote the health')] AmSn18960730-V11-30-page7.txt: [('ButthecommandmentsofGodapplytothe', 'But the commandments of God apply to the'), ('FormtheGREATTHROUGHLINEtoall', 'Form the GREAT THROUGH LINE to all'), ('TWOCANNIBALARCHIPELAGOES', 'TWO CANNIBAL ARCHIPELAGOES')] AmSn18960813-V11-32-page4.txt: [('controversieswhich', 'controversies which')] AmSn18960903-V11-35-page2.txt: [('entitledtoallthebenefitsofcivilsocieyt', 'entitled to all the benefits of civil so c i e y t')] AmSn18961001-V11-39-page7.txt: [('GeorgeFredWilliams', 'George Fred Williams'), ('ALeaderoftheNewDemocracy', 'A Leader of the New Democracy'), ('Othermakesmaybegood', 'Other makes may be good'), ('buttheWATERMANISTHEBEST', 'but the WATERMAN IS THE BEST')] AmSn18961015-V11-41-page7.txt: [('SendforCircularandTerms', 'Send for Circular and Terms'), ('Stampswillbeacceptableforsmallremit', 'Stamps will be acceptable for small remit')] AmSn18961119-V11-46-page2.txt: [('closingdecadeofthenineteenthcentury', 'closing decade of the nineteenth century')] AmSn18961126-V11-47-page4.txt: [('Religiousestablishments', 'Religious establishments')] AmSn18961210-V11-49-page7.txt: [('withhandsomedesignincolors', 'with handsome design in colors')] AmSn18970121-V12-03-page10.txt: [('wouldbeunchristian', 'would be unchristian')] AmSn18970121-V12-03-page11.txt: [('inseparablerelation', 'inseparable relation')] AmSn18970121-V12-03-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')] AmSn18970128-V12-04-page16.txt: [('NationalConstitution', 'National Constitution')] AmSn18970204-V12-05-page13.txt: [('ecclesiasticalinstitution', 'ecclesiastical institution')] AmSn18970211-V12-06-page3.txt: [('theseextraordinary', 'these extraordinary')] AmSn18970311-V12-10-page15.txt: [('NationalConstitution', 'National Constitution')] AmSn18970311-V12-10-page4.txt: [('Congregationalchurch', 'Congregational church')] AmSn18970318-V12-11-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O'), ('ABookfortheChildren', 'A Book for the Children')] AmSn18970325-V12-12-page8.txt: [('unalterableprinciple', 'unalterable principle')] AmSn18970401-V12-13-page16.txt: [('thisbookreadingismadeeasy', 'this book reading is made easy'), ('MyBiblereceivedthis', 'My Bible received this')] AmSn18970408-V12-14-page5.txt: [('TheItaliangovernment', 'The Italian government')] AmSn18970415-V12-15-page15.txt: [('gasternPPaasssenger', 'gas tern P P a ass sen ger')] AmSn18970422-V12-16-page16.txt: [('PracticalReference', 'Practical Reference')] AmSn18970506-V12-18-page16.txt: [('thoroughlyintroduce', 'thoroughly introduce')] AmSn18970513-V12-19-page1.txt: [('fundamentalprinciples', 'fundamental principles')] AmSn18970520-V12-20-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')] AmSn18970520-V12-20-page16.txt: [('Ifyoupreferthehalf', 'If you prefer the half'), ('themonthlypaymentwillbe', 'the monthly payment will be')] AmSn18970603-V12-22-page12.txt: [('religiousinstruction', 'religious instruction')] AmSn18970610-V12-23-page5.txt: [('interdenominationalism', 'inter denominationalism')] AmSn18970624-V12-25-page10.txt: [('approvingconscience', 'approving conscience')] AmSn18970624-V12-25-page5.txt: [('strictlyeconomical', 'strictly economical')] AmSn18970701-V12-26-page14.txt: [('PACIFICPRESSPUBLISHINGCP', 'PACIFIC PRESS PUBLISHING C P')] AmSn18970707-V12-27-page15.txt: [('InvaluabletoBibleandHistoryStudents', 'Invaluable to Bible and History Students')] AmSn18970722-V12-29-page2.txt: [('advancingevolution', 'advancing evolution')] AmSn18970722-V12-29-page4.txt: [('endangeringAmerican', 'endangering American')] AmSn18970805-V12-31-page9.txt: [('gamblingfraternity', 'gambling fraternity')] AmSn18970819-V12-33-page3.txt: [('ilvyettoAnaximandercreditfortheinventionofathin', 'i lv yet to Anaximander credit for the invention of a thin')] AmSn18970916-V12-36-page16.txt: [('TheBiblegivenasapresentforsixNEWyearlysubscriptionsat', 'The Bible given as a present for six NEW yearly subscriptions at')] AmSn18971007-V12-39-page16.txt: [('AstothemeritsoftheBibleweoffer', 'As to the merits of the Bible we offer'), ('IhavereceivedmyBible', 'I have received my Bible'), ('andtosayIamwellpleasedwould', 'and to say I am well pleased would')] AmSn18971014-V12-40-page14.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')] AmSn18971014-V12-40-page16.txt: [('ItisjustwhatIhavelongwanted', 'It is just what I have long wanted'), ('IreceivedtheBibleingoodcondition', 'I received the Bible in good condition')] AmSn18971021-V12-41-page14.txt: [('expensesreasonable', 'expenses reasonable')] AmSn18971028-V12-42-page16.txt: [('ofpageitwillbeseenthattheRevisedsieornreads', 'of page it will be seen that the Revised s i e o r n r e a d s')] AmSn18971028-V12-42-page2.txt: [('dangerousnegligence', 'dangerous negligence')] AmSn18971028-V12-42-page9.txt: [('politicalcorruption', 'political corruption')] AmSn18971111-V12-44-page1.txt: [('democraticgovernment', 'democratic government')] AmSn18971111-V12-44-page7.txt: [('thegreatestreformer', 'the greatest reformer')] AmSn18971125-V12-46-page14.txt: [('especiallyforconvertingnutsintobutterfor', 'especially for converting nuts into butter for')] AmSn18971209-V12-48-page4.txt: [('probablyinfluenced', 'probably influenced')] AmSn18971230-V12-50-page13.txt: [('differencelbetween', 'difference l between')] AmSn18980106-V13-01-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')] AmSn18980113-V13-02-page15.txt: [('IllustratingtheTravelsofPaul', 'Illustrating the Travels of Paul')] AmSn18980113-V13-02-page16.txt: [('togiveourpatronsabenefit', 'to give our patrons a benefit')] AmSn18980120-V13-03-page14.txt: [('DameFashionandHerSlaves', 'Dame Fashion and Her Slaves'), ('HealthandTemperanceMiscellany', 'Health and Temperance Miscellany')] AmSn18980120-V13-03-page3.txt: [('individualChristians', 'individual Christians')] AmSn18980120-V13-03-page6.txt: [('heavenlycitizenship', 'heavenly citizenship')] AmSn18980127-V13-04-page14.txt: [('oftheCYCLONECONTINUES', 'of the CYCLONE CONTINUES')] AmSn18980203-V13-05-page14.txt: [('IllustratingtheTravelsofPaul', 'Illustrating the Travels of Paul')] AmSn18980210-V13-06-page8.txt: [('righteousnessright', 'righteousness right')] AmSn18980210-V13-06-page9.txt: [('Onstratethepropriety', 'On st rate the propriety')] AmSn18980217-V13-07-page15.txt: [('Anothervaluablefeature', 'Another valuable feature')] AmSn18980224-V13-08-page6.txt: [('ourChristiancharacter', 'our Christian character')] AmSn18980303-V13-09-page14.txt: [('bottomwithmetalandready', 'bottom with metal and ready')] AmSn18980303-V13-09-page7.txt: [('EstablishingReligious', 'Establishing Religious')] AmSn18980310-V13-10-page14.txt: [('Catalogueofallourstyles', 'Catalogue of all our styles'), ('CatarrhInhalerFree', 'Catarrh Inhaler Free')] AmSn18980331-V13-13-page14.txt: [('IllustratingtheTravelsofPaul', 'Illustrating the Travels of Paul')] AmSn18980331-V13-13-page15.txt: [('Anothervaluablefeature', 'Another valuable feature')] AmSn18980407-V13-14-page12.txt: [('covetousdisposition', 'covetous disposition')] AmSn18980414-V13-15-page14.txt: [('Communicationsstrictly', 'Communications strictly')] AmSn18980512-V13-19-page6.txt: [('intermeddlehimself', 'intermeddle himself')] AmSn18980602-V13-22-page7.txt: [('Christiansunconsciously', 'Christians unconsciously'), ('Catholicsunderstand', 'Catholics understand')] AmSn18980609-V13-23-page14.txt: [('SolidVestibuledPullmanDiningandSleepingCarTrains', 'Solid Vestibuled Pullman Dining and Sleeping Car Trains')] AmSn18980609-V13-23-page15.txt: [('hethatspeakswithatongue', 'he that speaks with a tongue')] AmSn18980609-V13-23-page9.txt: [('civicrighteousness', 'civic righteousness')] AmSn18980616-V13-24-page14.txt: [('ManualofParliamentaryRules', 'Manual of Parliamentary Rules')] AmSn18980616-V13-24-page15.txt: [('weseethroughaglass', 'we see through a glass')] AmSn18980623-V13-25-page3.txt: [('correctlyrepresent', 'correctly represent')] AmSn18980623-V13-25-page5.txt: [('questiondestruction', 'question destruction')] AmSn18980630-V13-26-page14.txt: [('handsomelyillustrated', 'handsomely illustrated')] AmSn18980811-V13-31-page3.txt: [('PhilippineArchipelago', 'Philippine Archipelago')] AmSn18980818-V13-32-page8.txt: [('establishingAmerican', 'establishing American')] AmSn18980915-V13-36-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')] AmSn18980922-V13-37-page11.txt: [('smallconsideration', 'small consideration')] AmSn18980929-V13-38-page2.txt: [('voluntarilydescended', 'voluntarily descended')] AmSn18981006-V13-39-page14.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')] AmSn18981110-V13-44-page14.txt: [('reeFeoiredyeiaerrsefe', 'ree Fe o ire dye i a errs e f e')] AmSn18981110-V13-44-page3.txt: [('ecclesiasticalpowers', 'ecclesiastical powers')] AmSn18981208-V13-48-page15.txt: [('ScientificAmerican', 'Scientific American')] AmSn18981215-V13-49-page7.txt: [('otherinsurrectionists', 'other insurrection i sts')] AmSn18990105-V14-01-page15.txt: [('concerningthismuch', 'concerning this much')] AmSn18990112-V14-02-page14.txt: [('InterestingandInstructive', 'Interesting and Instructive')] AmSn18990112-V14-02-page15.txt: [('concerningthismuch', 'concerning this much')] AmSn18990126-V14-04-page12.txt: [('biblicalChristianity', 'biblical Christianity')] AmSn18990126-V14-04-page15.txt: [('concerningthismuch', 'concerning this much')] AmSn18990126-V14-04-page16.txt: [('Americanimperialism', 'American imperialism')] AmSn18990126-V14-04-page3.txt: [('certainiinalienable', 'certain i inalienable')] AmSn18990202-V14-05-page5.txt: [('Americandomination', 'American domination')] AmSn18990209-V14-06-page5.txt: [('thoroughlyfurnished', 'thoroughly furnished')] AmSn18990316-V14-11-page15.txt: [('concerningthismuch', 'concerning this much')] AmSn18990316-V14-11-page6.txt: [('earthlygovernments', 'earthly governments')] AmSn18990330-V14-13-page10.txt: [('injusticeinseparable', 'injustice inseparable')] AmSn18990330-V14-13-page15.txt: [('concerningthismuch', 'concerning this much')] AmSn18990511-V14-19-page15.txt: [('Postagestampsaccepted', 'Postage stamps accepted')] AmSn18990518-V14-20-page12.txt: [('appallingproportions', 'appalling proportions')] AmSn18990518-V14-20-page13.txt: [('Ordernowandgetthebenefitofasplendidhelpin', 'Order now and get the benefit of a splendid help in')] AmSn18990518-V14-20-page14.txt: [('InterestingandInstructive', 'Interesting and Instructive')] AmSn18990525-V14-21-page6.txt: [('indicateverymoderate', 'indicate very moderate')] AmSn18990608-V14-23-page13.txt: [('grosslyjdiscriminating', 'grossly j discriminating')] AmSn18990615-V14-24-page11.txt: [('employmentelsewhere', 'employment elsewhere')] AmSn18990713-V14-27-page13.txt: [('Oneofthemostinterestingvolumeseverpublished', 'One of the most interesting volumes ever published')] AmSn18990727-V14-29-page11.txt: [('temporalgovernments', 'temporal governments')] AmSn18990803-V14-30-page15.txt: [('concerningthismuch', 'concerning this much')] AmSn18990810-V14-31-page10.txt: [('followingresolution', 'following resolution')] AmSn18990824-V14-33-page7.txt: [('exercisethemselves', 'exercise themselves')] AmSn18990831-V14-34-page5.txt: [('foreignnationality', 'foreign nationality')] AmSn18990914-V14-36-page15.txt: [('IINothingBetterPublished', 'II Nothing Better Published')] AmSn18991005-V14-39-page10.txt: [('louderdemonstrations', 'louder demonstrations')] AmSn18991005-V14-39-page15.txt: [('Containingadditional', 'Containing additional')] AmSn18991019-V14-41-page3.txt: [('overworkingthemselves', 'over working themselves')] AmSn18991102-V14-43-page2.txt: [('secureirecognition', 'secure i recognition')] AmSn18991116-V14-45-page11.txt: [('alreadyestablished', 'already established')] AmSn18991130-V14-47-page3.txt: [('objectionableithing', 'objectionable i thing')] AmSn19000104-V15-01-page5.txt: [('notwithstandingconstitutional', 'notwithstanding constitutional'), ('GenuineChristianity', 'Genuine Christianity')] AmSn19000111-V15-02-page7.txt: [('ordinaryinstruments', 'ordinary instruments')] AmSn19000111-V15-02-page8.txt: [('advertisementcaptured', 'advertisement captured')] AmSn19000118-V15-03-page7.txt: [('independtrespassers', 'in depend trespassers')] AmSn19000118-V15-03-page8.txt: [('renderingallegiance', 'rendering allegiance')] AmSn19000125-V15-04-page12.txt: [('advocateindependence', 'advocate independence')] AmSn19000201-V15-05-page4.txt: [('civilandreligiousfreedom', 'civil and religious freedom')] AmSn19000208-V15-06-page14.txt: [('bindingforcontinuous', 'binding for continuous')] AmSn19000208-V15-06-page2.txt: [('developmenthasvaried', 'development has varied')] AmSn19000215-V15-07-page1.txt: [('principleunderlying', 'principle underlying')] AmSn19000215-V15-07-page14.txt: [('beautifullystamped', 'beautifully stamped')] AmSn19000301-V15-09-page3.txt: [('establishingtheRoman', 'establishing the Roman')] AmSn19000315-V15-11-page10.txt: [('accustomingthemselves', 'a c custom ing themselves')] AmSn19000315-V15-11-page13.txt: [('literaryentertainments', 'literary entertainments')] AmSn19000315-V15-11-page6.txt: [('isdiscriminationand', 'is discrimination and'), ('discriminationshall', 'discrimination shall')] AmSn19000405-V15-14-page3.txt: [('Politicsirepresents', 'Politics i represents')] AmSn19000405-V15-14-page6.txt: [('languageArchbishop', 'language Archbishop')] AmSn19000426-V15-17-page14.txt: [('StateDistinguished', 'State Dist ing u i s h e d')] AmSn19000426-V15-17-page3.txt: [('Sundaylawsclaimthat', 'Sunday laws claim that')] AmSn19000719-V15-28-page1.txt: [('Sabbathinstitution', 'Sabbath institution')] AmSn19000719-V15-28-page3.txt: [('eveirtowardiaristocracy', 'eve ir toward i aristocracy'), ('magistratebelieveth', 'magistrate believeth')] AmSn19000719-V15-28-page8.txt: [('conditionsprevailing', 'conditions prevailing')] AmSn19000809-V15-31-page12.txt: [('theireleeterafPrOeirtil', 'the ire lee ter a f P r O e i r t i l')] AmSn19000816-V15-32-page3.txt: [('considerablepolitical', 'considerable political')] AmSn19000830-V15-34-page10.txt: [('constitutionallimitations', 'constitutional limitations')] AmSn19000830-V15-34-page11.txt: [('prosecutingattorney', 'prosecuting attorney')] AmSn19000830-V15-34-page14.txt: [('interestinghistories', 'interesting histories')] AmSn19000913-V15-36-page14.txt: [('caYearforMedicalFees', 'c a Year for Medical Fees')] AmSn19000920-V15-37-page8.txt: [('incompetentreligious', 'incompetent religious')] AmSn19000928-V15-38-page5.txt: [('generacovetousness', 'genera covetousness')] AmSn19001018-V15-41-page11.txt: [('religiOuscharaeter', 'religiOus char aet er')] AmSn19001025-V15-42-page2.txt: [('everlastinginheritance', 'everlasting inheritance')] AmSn19001129-V15-47-page11.txt: [('Christiancitizenship', 'Christian citizenship')] AmSn19001129-V15-47-page5.txt: [('accomplishstatement', 'accomplish statement')] AmSn19001206-V15-48-page10.txt: [('betfererigagerfents', 'bet fere rig a ger fe n t s')] AmSn19001206-V15-48-page6.txt: [('appropriationonSunday', 'appropriation on Sunday')]
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction9 Average verified rate: 0.9830732455807398 Average of error rates: 0.018591465234258434 Total token count: 8364805
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
[("'", 8053), ('t', 4433), ('e', 3990), ('d', 3954), ('w', 3754), ('co', 3408), ('m', 3174), ('n', 3056), ('f', 2037), ('r', 2033), ('th', 1645), ('g', 1375), ('mo', 1160), ('u', 930), ('x', 864), ('ex', 521), ('pa', 410), ('q', 399), ('sunday-law', 334), ('k', 315), ("the'", 304), ('pp', 299), ('tion', 276), ("conscience'", 260), ('ch', 253), ('seventhday', 249), ('re', 224), ('ga', 220), ('oc', 218), ('z', 217), ('wm', 215), ('satolli', 210), ('employes', 209), ('munn', 207), ('ti', 200), ('id', 181), ('un', 173), ('ry', 170), ('al', 166), ('sunday-closing', 160), ('ca', 151), ('ment', 146), ('chain-gang', 136), ("to'", 134), ('nd', 130), ('ll', 128), ('lb', 125), ('il', 123), ('bateham', 122), ('cmsar', 121)]