AmSn-OCR-Evaluation-and-Correction

In [1]:
%load_ext autoreload
In [2]:
%autoreload 2
In [3]:
from text2topics import reports
from text2topics import utilities
from text2topics import clean
import re
import os
from os import listdir
from os.path import isfile, join
import collections
In [4]:
%matplotlib inline
In [5]:
wordlist_dir = "/Users/jeriwieringa/Dissertation/drafts/data/word-lists"
wordlists = ["2016-12-07-SDA-last-names.txt", 
             "2016-12-07-SDA-place-names.txt", 
             "2016-12-08-SDA-Vocabulary.txt", 
             "2017-01-03-place-names.txt", 
             "2017-02-14-Base-Word-List-SCOWL&KJV.txt",
             "2017-02-14-Roman-Numerals.txt",
             "2017-03-01-Additional-Approved-Words.txt"
            ]
In [6]:
spelling_dictionary = utilities.create_spelling_dictionary(wordlist_dir, wordlists)
In [7]:
title = "AmSn"
In [8]:
base_dir = "/Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/{}/".format(title)

Baseline

In [9]:
cycle = 'baseline'
In [10]:
stats = reports.overview_report(join(base_dir, cycle), spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/baseline

Average verified rate: 0.9417475625771581

Average of error rates: 0.059656669650850494

Total token count: 8534424

In [11]:
errors_summary = reports.get_errors_summary( stats )
reports.top_errors( errors_summary, 100 )
Out[11]:
[('-', 12650),
 ('ñ', 11855),
 ("'", 8075),
 ('tion', 6722),
 ('con-', 5879),
 ('re-', 5789),
 ('¥', 5128),
 ('t', 4124),
 ('ment', 4081),
 ('in-', 3962),
 (')', 3929),
 ('d', 3884),
 ('e', 3716),
 ('w', 3705),
 ('co', 3389),
 ('m', 3097),
 ('de-', 2928),
 ('com-', 2715),
 ('n', 2658),
 ('be-', 2447),
 ('pro-', 2149),
 ('sun-', 1953),
 ('f', 1919),
 ('ex-', 1874),
 ('chris-', 1869),
 ('*', 1867),
 ('r', 1832),
 ('tions', 1677),
 ('th', 1673),
 ('en-', 1576),
 ('dis-', 1563),
 ('govern-', 1495),
 ('(', 1448),
 ('gov-', 1339),
 ('g', 1311),
 ('per-', 1282),
 ('sab-', 1252),
 ('tian', 1181),
 ('mo', 1146),
 ('un-', 1141),
 ('na-', 1133),
 ('ernment', 1060),
 ('reli-', 1056),
 ('ance', 1023),
 ('ob-', 1011),
 ('pre-', 998),
 ('ments', 942),
 ('ad-', 937),
 ('ity', 935),
 ('sunday-law', 923),
 ('ac-', 911),
 ('tional', 911),
 ('ñthe', 905),
 ('u', 881),
 (']', 868),
 ('ligious', 836),
 ('im-', 815),
 ('ap-', 807),
 ('ple', 790),
 ('sub-', 738),
 ('x', 730),
 ('ence', 714),
 ('an-', 698),
 ('gious', 675),
 ('relig-', 665),
 ('ques-', 625),
 ('peo-', 623),
 ('ers', 621),
 ('at-', 601),
 ("'the", 596),
 ('al-', 586),
 ('as-', 576),
 ('inter-', 561),
 ('to-', 554),
 ('pub-', 546),
 ('them-', 544),
 ('fol-', 536),
 ('prin-', 522),
 ('constitu-', 520),
 ('ligion', 516),
 ('erty', 511),
 ('_', 511),
 ('/', 510),
 ('sup-', 507),
 ('for-', 498),
 ('tianity', 490),
 ('au-', 484),
 ('stitution', 475),
 ('coun-', 472),
 ('ious', 464),
 ('the-', 460),
 ('em-', 453),
 ('pur-', 447),
 ('observ-', 446),
 ('cath-', 441),
 ('any-', 439),
 ('amer-', 437),
 ('lib-', 430),
 ('gress', 425),
 ('there-', 414),
 ('sev-', 411),
 ('legisla-', 410),
 ('[the', 409),
 ('with-', 394),
 ('wor-', 386),
 ('legis-', 381),
 ('--', 381),
 ('sen-', 380),
 ('or-', 379),
 ('op-', 375),
 ('pa', 375),
 ('ican', 375),
 ('mis-', 373),
 ('ent', 372),
 ('consti-', 366),
 ('estab-', 365),
 ('ful', 355),
 ('servance', 354),
 ('man-', 353),
 ('under-', 352),
 ('cer-', 351),
 ('lished', 351),
 ('lation', 351),
 ('es-', 350),
 ('q', 350),
 ('chain-gang', 349),
 ('lic', 345),
 ('ble', 344),
 ('di-', 343),
 ('pres-', 342),
 ('tution', 342),
 ('ber', 341),
 ('gion', 339),
 ('can-', 338),
 ('ña', 338),
 ('%', 335),
 ('ar-', 334),
 ('princi-', 332),
 ('ture', 326),
 ('move-', 325),
 ('tive', 321),
 ('ous', 316),
 ('gen-', 316),
 ('olic', 315),
 ('sunday-closing', 314),
 ("the'", 312),
 ('thority', 312),
 ('prot-', 311),
 ('ject', 310),
 ('command-', 305),
 ('his-', 303),
 ('tians', 300),
 ('ical', 297),
 ('repre-', 295),
 ('su-', 295),
 ('eral', 295),
 ('se-', 293),
 ('so-', 292),
 ('ual', 291),
 ('=', 291),
 ('par-', 291),
 ('mat-', 290),
 ('ation', 290),
 ('k', 288),
 ('state-', 288),
 ('author-', 287),
 ('him-', 285),
 ('-the', 285),
 ('sunday-', 283),
 ('amend-', 282),
 ('sunday-rest', 281),
 ('pp', 281),
 ('cor-', 281),
 ('indi-', 281),
 ('¥¥', 281),
 ('ples', 280),
 ('ciples', 277),
 ('ñthat', 277),
 ('ex', 275),
 ('ameri-', 271),
 ('institu-', 268),
 ('mem-', 268),
 ('every-', 266),
 ('ch', 266),
 ('ists', 265),
 ('ma-', 262),
 ('neces-', 262),
 ('ñno', 260),
 ('ters', 260),
 ('of-', 259),
 ('senti-', 259),
 ('sec-', 258),
 ('pa-', 257),
 ('seventh-', 256),
 ('work-', 256),
 ('po-', 253),
 ('some-', 251),
 ('doc-', 247),
 ('ciple', 246),
 ('sim-', 243),
 ('pos-', 242),
 ('(the', 240),
 ('busi-', 238),
 ('ary', 237),
 ("'of", 236),
 ('char-', 235),
 ('evi-', 233),
 ('follow-', 233),
 ('perse-', 231),
 ('mo-', 230),
 ('insti-', 229),
 ('mand', 229),
 ('lieve', 228),
 ("conscience'", 228),
 ('ã', 226),
 ('recog-', 226),
 ('bers', 225),
 ('num-', 221),
 ('prac-', 221),
 ('leg-', 219),
 ('tained', 219),
 ('ñand', 218),
 ('mitted', 217),
 ('suc-', 216),
 ('what-', 216),
 ('**', 216),
 ('ga', 216),
 ('individ-', 216),
 ('out-', 214),
 ('oc', 214),
 ('sition', 213),
 ('free-', 213),
 ('mandment', 212),
 ('wm', 212),
 ('accord-', 211),
 ('how-', 210),
 ('prop-', 210),
 ('pel', 210),
 ('main-', 206),
 ('munn', 206),
 ('fellow-citizens', 204),
 ('satolli', 204),
 ('ten-', 203),
 ('rec-', 203),
 ('preme', 201),
 ('ab-', 201),
 ('king-', 200),
 ('***', 198),
 ('over-', 198),
 ('christian-', 197),
 ('hu-', 196),
 ('sented', 193),
 ('yo', 193),
 ('col-', 192),
 ('sions', 190),
 ('noth-', 190),
 ('illus-', 188),
 ('ure', 188),
 ('citi-', 187),
 ('fur-', 187),
 ('eng-', 186),
 ('right-', 185),
 ('min-', 184),
 ('tures', 184),
 ('z', 184),
 ('associa-', 184),
 ('stand-', 183),
 ('sur-', 183),
 ('exer-', 182),
 ('ry', 182),
 ('advent-', 182),
 ('tainly', 182),
 ('argu-', 182),
 ('sible', 182),
 ('tem-', 180),
 ('invari-', 179),
 ('employes', 178),
 ('catho-', 176),
 ('protest-', 176),
 ('thou-', 175),
 ('car-', 175),
 ('accom-', 175),
 ('dred', 174),
 ('ti', 173),
 ('judg-', 173),
 ('persecu-', 173),
 ("'a", 172),
 ('self-', 172),
 ('struction', 172),
 ('teach-', 171),
 ('ther', 170),
 ('inde-', 170),
 ('conse-', 170),
 ('ca', 170),
 ('dence', 170),
 ('dividual', 169),
 ('vio-', 168),
 ('posi-', 167),
 ('ñthomas', 167),
 ('co-', 167),
 ('ro-', 166),
 ('mittee', 166),
 ('`', 165),
 ('sary', 165),
 ('-of', 164),
 ('tinel', 164),
 ('ñi', 162),
 ('ñit', 162),
 ('af-', 161),
 ('hun-', 160),
 ('rep-', 160),
 ('(see', 159),
 ('attorney-general', 159),
 ('al', 159),
 ('organ-', 159),
 ('un-american', 156),
 ('trans-', 155),
 ('tary', 155),
 ('rest-day', 154),
 ('estant', 154),
 ('asso-', 153),
 ('(entered', 153),
 ('mony', 153),
 ('law-abiding', 152),
 ('rea-', 152),
 ('sys-', 152),
 ("'to", 152),
 ('des-', 152),
 ('uni-', 151),
 ('ference', 150),
 ('mc', 149),
 ("'and", 149),
 ('ceived', 148),
 ('enforce-', 148),
 ('bap-', 148),
 ('conven-', 148),
 ('har-', 147),
 ('presi-', 147),
 ('cen-', 147),
 ('law-', 146),
 ('differ-', 146),
 ('ñrev', 146),
 ('acter', 146),
 ('ciation', 146),
 ('sa-', 145),
 ('[from', 145),
 ('peti-', 145),
 ('states-', 145),
 ('vention', 145),
 ('scrip-', 144),
 ('fied', 144),
 ('jus-', 144),
 ('trol', 144),
 ('sabbath-breaking', 144),
 ('dif-', 143),
 ('clared', 142),
 ('religio-political', 142),
 ('cir-', 142),
 ('representa-', 141),
 ('protes-', 141),
 ('ish', 141),
 ('vidual', 141),
 ('ventists', 141),
 ('ular', 141),
 ('organiza-', 141),
 ('ist', 140),
 ('minis-', 140),
 ('tation', 140),
 ('duced', 139),
 ('(and', 139),
 ('re', 139),
 ('tains', 138),
 ('post-', 138),
 ('instruc-', 137),
 ('id', 137),
 ('dren', 137),
 ('cial', 137),
 ('terest', 136),
 ('influ-', 136),
 ('ished', 136),
 ('trated', 136),
 ('non-', 136),
 ('mands', 136),
 ('enth-day', 136),
 ("to'", 134),
 ('ered', 133),
 ('chil-', 133),
 ('cially', 133),
 ('ô', 132),
 ('educa-', 132),
 ('pros-', 132),
 ('intro-', 132),
 ('no-', 132),
 ('trary', 132),
 ('meet-', 131),
 ('fa-', 131),
 ('ica', 130),
 ('pun-', 130),
 ('cently', 130),
 ('mar-', 129),
 ('quired', 129),
 ('vest-pocket', 129),
 ('nessee', 128),
 ('denomina-', 128),
 ('ilar', 128),
 ('se', 128),
 ('jority', 128),
 ('manded', 127),
 ('ll', 127),
 ('-to', 127),
 ('tives', 127),
 ('pelled', 126),
 ('rian', 126),
 ('dition', 126),
 ('thatñ', 126),
 ('ated', 125),
 ('¡', 125),
 ('cise', 125),
 ('litical', 125),
 ('nal', 125),
 ('lish', 125),
 ('wash-', 124),
 ('cated', 124),
 ('mit', 124),
 ('parlia-', 124),
 ('consid-', 124),
 ('sug-', 123),
 ('olics', 123),
 ('tem', 122),
 ('ñnew', 122),
 ('ures', 122),
 ('secution', 122),
 ('cept', 121),
 ('news-', 121),
 ('hon-', 121),
 ('ized', 120),
 ('establish-', 120),
 ('eousness', 120),
 ('il', 119),
 ('nd', 119),
 ('oc-', 119),
 ('advo-', 118),
 ('cution', 118),
 ('edu-', 118),
 ('serv-', 118),
 ('islation', 118),
 ('counter-arguments', 117),
 ('rela-', 117),
 ('sabbath-', 116),
 ('mainte-', 116),
 ('cmsar', 116),
 ('condi-', 115),
 ('do-', 115),
 ('conscien-', 115),
 ('resolu-', 115),
 ('chi-', 115),
 ('ances', 115),
 ('sat-', 114),
 ('lb', 114),
 ('ingly', 114),
 ('sin-', 114),
 ('polit-', 114),
 ('stitutions', 113),
 ('ington', 113),
 ('sabbath-day', 113),
 ('refer-', 113),
 ('pression', 112),
 ('stat-', 112),
 ('nounced', 112),
 ('gos-', 112),
 ('fellow-', 112),
 ('aleck', 112),
 ('apos-', 112),
 ("'i", 111),
 ('spect', 111),
 ('cerning', 111),
 ('sunday-keeping', 110),
 ('circum-', 110),
 ('\\', 110),
 ('libertyñchristian', 109),
 ('ciety', 109),
 ("'in", 109),
 ('nel', 109),
 ('sus-', 109),
 ('non-sectarian', 108),
 ("and'", 108),
 ('bateham', 107),
 ('reform-', 107),
 ('appro-', 107),
 ('tant', 107),
 ('forcement', 107),
 ('%x', 107),
 ('ies', 107),
 ('deter-', 106),
 ('dan-', 106),
 ('politi-', 106),
 ('sentin', 106),
 ('mandments', 106),
 ('ñjesus', 106),
 ('pe-', 106),
 ('espe-', 105),
 ('suf-', 105),
 ('is-', 105),
 ('ñto', 105),
 ('ñin', 104),
 ('estants', 104),
 ('eration', 104),
 ('depart-', 104),
 ("'is", 104),
 ('prom-', 104),
 ('ning', 104),
 ('meth-', 103),
 ('well-', 103),
 ('cc', 103),
 ('ne-', 103),
 ('funda-', 103),
 ('cussion', 103),
 ('ñnot', 103),
 ('•', 102),
 ('milly', 102),
 ('ern', 102),
 ('tered', 102),
 ('neigh-', 102),
 ('righteous-', 102),
 ('pol-', 102),
 ('sity', 102),
 ('pendence', 102),
 ('sanc-', 101),
 ('sionary', 101),
 ('cago', 101),
 ('possi-', 101),
 ('nity', 101)]

Correction 1 -- Special Characters

The first common error appears to be dangling line endings. However, in order to best capture and fix those endings, I am first normalizing the line ending characters and address special characters. Before removing, I will check for regular non-English language use to see if there are particular characters that should be preserved.

In [12]:
reports.tokens_with_special_characters(errors_summary)
Out[12]:
[('ñ', 11855),
 ('¥', 5128),
 (')', 3929),
 ('*', 1867),
 ('(', 1448),
 ('ñthe', 905),
 (']', 868),
 ('_', 511),
 ('/', 510),
 ('[the', 409),
 ('ña', 338),
 ('%', 335),
 ('=', 291),
 ('¥¥', 281),
 ('ñthat', 277),
 ('ñno', 260),
 ('(the', 240),
 ('ã', 226),
 ('ñand', 218),
 ('**', 216),
 ('***', 198),
 ('ñthomas', 167),
 ('`', 165),
 ('ñi', 162),
 ('ñit', 162),
 ('(see', 159),
 ('(entered', 153),
 ('ñrev', 146),
 ('[from', 145),
 ('(and', 139),
 ('ô', 132),
 ('thatñ', 126),
 ('¡', 125),
 ('ñnew', 122),
 ('\\', 110),
 ('libertyñchristian', 109),
 ('%x', 107),
 ('ñjesus', 106),
 ('ñto', 105),
 ('ñin', 104),
 ('ñnot', 103),
 ('•', 102),
 ('`the', 98),
 ('ñchristian', 96),
 ('-¥', 94),
 ('ñany', 90),
 ('(a', 85),
 ('(which', 80),
 ('[', 80),
 ('(or', 78),
 ('(as', 78),
 ('ñbut', 73),
 ('¤', 72),
 ('ñby', 71),
 ('(new', 68),
 ('(for', 68),
 ('(in', 67),
 ('[of', 67),
 ('ñis', 66),
 ('[sunday]', 66),
 ('ñas', 66),
 ('ñwhich', 65),
 ('libertyñchris-', 65),
 ('<', 64),
 ('ñif', 64),
 ('the¥', 64),
 ('+', 63),
 ('ñan', 62),
 ('ñthis', 62),
 ('ñyes', 61),
 ('(sunday)', 60),
 ('[in', 60),
 ('¥¥¥', 59),
 ('—', 58),
 ('(n', 58),
 ('(i', 57),
 ('ñfor', 56),
 ('¥the', 55),
 ('(rev', 54),
 ('(to', 52),
 ('ñwhy', 51),
 ('(baptist)', 50),
 ('ñwe', 50),
 ('ñharvard', 50),
 ('ñall', 49),
 ('ñc', 49),
 ('¥-', 49),
 ('ñn', 49),
 ('ñhow', 49),
 ('(not', 49),
 ('(mich', 48),
 ('ñwhen', 48),
 ('ñwhat', 48),
 ('sentinel_', 46),
 ('day)', 46),
 ('ñhe', 46),
 ('excepted)', 46),
 ('(rom', 45),
 ('ñthey', 45),
 ('ñsigns', 45),
 ('(concluded', 45),
 ('(size', 45),
 ('(italics', 43),
 ('~~', 42),
 ('(john', 42),
 ('[mr', 42),
 ('(second', 42),
 ('#', 42),
 ('£', 42),
 ('ó', 41),
 ('(matt', 40),
 ('ñthere', 40),
 ('ñfrom', 39),
 ('[new', 39),
 ('[this', 39),
 ('inches)', 38),
 ('(acts', 38),
 ('\ufeff', 37),
 ('ñbecause', 37),
 ('¦', 37),
 ('to¥', 37),
 ('ñcatholic', 36),
 ('(with', 36),
 ('(if', 36),
 ('[by', 36),
 ('[or', 36),
 ('(page', 35),
 ('(works', 35),
 ('ñyou', 35),
 ('sabbathñthe', 35),
 ('ñoñ', 35),
 ('¥-¥', 35),
 ('ñid', 34),
 ('(p', 34),
 ('ñone', 34),
 ('york)', 33),
 ('ñthen', 33),
 ('[not', 33),
 ('ñwho', 33),
 ('(dyspeptic)', 32),
 ('worldñto', 32),
 ('~', 32),
 ('c)', 32),
 ('ñpage', 32),
 ('ñreligious', 31),
 ('notes)', 31),
 ('ñcertainly', 31),
 ('¥of', 31),
 ('ñor', 31),
 ('catholic)', 30),
 ('(saturday)', 30),
 ('ñpresent', 30),
 ('ñof', 30),
 ('ñpresbyterian', 29),
 ('(continuing', 29),
 ('}', 29),
 ('ñmr', 29),
 ('ñon', 29),
 ('(tenn', 29),
 ('(of', 29),
 ('(roman', 29),
 ('ñwith', 29),
 ('(r', 29),
 ('lawñthe', 29),
 ('[we', 29),
 ('church)', 28),
 ('ñjottings', 28),
 ('(mass', 28),
 ('sunday)', 28),
 ('(pa', 28),
 ('(except', 28),
 ('(a)', 27),
 ('>', 27),
 ('(no', 27),
 ('bookñ', 27),
 ('(exact', 27),
 ('catholics]', 26),
 ('ñst', 26),
 ('¥¥¥¥', 25),
 ('and¥', 25),
 ('(ps', 25),
 ('ñsabbath', 25),
 ('(that', 25),
 ('split)', 25),
 ('(who', 25),
 ('church]', 25),
 ('bibleñ', 25),
 ('(from', 25),
 ('ñ]', 24),
 ('(mr', 24),
 ('(capillary', 24),
 ('[a', 24),
 ('^', 23),
 ('(b)', 23),
 ('thisñourñour', 23),
 ('(this', 23),
 ('(though', 23),
 ('(cal', 23),
 ('a¥', 23),
 ('the_', 23),
 ('specimen)', 23),
 ('[that', 23),
 ('(luke', 23),
 ('ñjohn', 23),
 ('attraction)', 23),
 ('ñwill', 22),
 ('of¥', 22),
 ('(late', 22),
 ('refused)', 22),
 ('churchñ', 22),
 ('ñdo', 22),
 ('a_', 22),
 ('[italics', 22),
 ('(heb', 22),
 ('pages)', 22),
 ('[to', 22),
 ('(by', 22),
 ('(once', 22),
 ('(col', 21),
 ('a)', 21),
 ('[roman', 21),
 ('ñsimply', 21),
 ('(london', 21),
 ('self=pronouncing', 21),
 ('(dan', 21),
 ('ñsuch', 21),
 ('(isa', 21),
 ('`we', 21),
 ('mealñ(an)', 21),
 ('godñthe', 21),
 ('ñpeoplesñthat', 21),
 ('_the', 21),
 ('missionaryñby', 20),
 ('(c)', 20),
 ('ñwell', 20),
 ('(catholic)', 20),
 ('(over', 20),
 ('(gen', 20),
 ('[applause', 20),
 ('recipesñpost-paid', 20),
 ('it)', 19),
 ('ñjames', 19),
 ('itñ', 19),
 ('coã', 19),
 ('plorerñby', 19),
 ('ñdr', 19),
 ('¥a', 19),
 ('`and', 19),
 ('ñare', 19),
 ('sentinel¥', 19),
 ('ñhas', 19),
 ('ñw', 19),
 ('[christ]', 19),
 ('ñsan', 19),
 ('ñonly', 19),
 ('guineañby', 18),
 ('[sunday', 18),
 ('ñwould', 18),
 ("'¥", 18),
 ('_american', 18),
 ('[and', 18),
 ('(verse', 18),
 ('ñeditor', 18),
 ('ñthese', 18),
 ('ñthose', 18),
 ('`i', 18),
 ('(puritan)', 18),
 ('(we', 18),
 ('ñnellie', 18),
 ('christian(', 18),
 ('ñat', 18),
 ('statesñthe', 18),
 ('[mention', 18),
 ('chairmanñ', 17),
 ('is¥', 17),
 ('churchñthe', 17),
 ('(having)', 17),
 ('ñchicago', 17),
 ('city)', 17),
 ('(based', 17),
 ('sentinel)', 17),
 ('i)', 17),
 ('congoñby', 17),
 ('ñspringfield', 17),
 ('ñmen', 17),
 ("cushing's)", 17),
 ('¥and', 17),
 ('(chicago)', 17),
 ('ñmrs', 17),
 ('¥in', 17),
 ('ñshe', 17),
 ('ñhistory', 17),
 ('ñen', 17),
 ('û', 17),
 ('edition)', 17),
 ('sunday]', 17),
 ('(i)', 16),
 ('ñso', 16),
 ("ñman's", 16),
 ('godñ', 16),
 ('ñgreat', 16),
 ('ñhis', 16),
 ('`it', 16),
 ('(illustrated)', 16),
 ('-*', 16),
 ('(eph', 16),
 ('ñwas', 16),
 ('ñoh', 16),
 ('ñlet', 16),
 ('(applause', 16),
 ('ñjust', 16),
 ('~~¥', 16),
 ('nigerñ', 16),
 ('numbers)', 16),
 ('melanesiañby', 16),
 ('ñbaptist', 15),
 ('t)', 15),
 ('♦', 15),
 ('missionsñby', 15),
 ('in¥', 15),
 ('a¥nd', 15),
 ('statesñ', 15),
 ('ñp', 15),
 ('ñking', 15),
 ('day]', 15),
 ('religionñthe', 15),
 ('*-', 15),
 ('(mark', 15),
 ('state]', 15),
 ('(continued', 15),
 ('law]', 15),
 ('(including', 15),
 ('a_n', 15),
 ('isñ', 15),
 ('governmentñthe', 15),
 ('mangañby', 14),
 ('(water', 14),
 ('sundayñ', 14),
 ('(minn', 14),
 ("'ñ", 14),
 ('ñsunday', 14),
 ('independenceñthe', 14),
 ('(ind', 14),
 ('page)', 14),
 ('ñu', 14),
 ('ñfood', 14),
 ('ñandñ', 14),
 ('l¥', 14),
 ('¥*', 14),
 ('nineteenthñwill', 14),
 ('¥to', 14),
 ('washington)', 14),
 ('¥that', 14),
 ('andñ', 14),
 ('in_', 14),
 ('ñsunday-law', 14),
 ('concordanceña', 14),
 ('villageñone', 14),
 ('ñart', 14),
 ('ñh', 14),
 ('()', 14),
 ('ñgod', 14),
 ('*from', 14),
 ('ñevangel', 14),
 ('catholic]', 14),
 ('for¥', 14),
 ('sundayñthe', 14),
 ('ñaddressñ', 14),
 ('helpsñembracing', 13),
 ('ñboston', 13),
 ('powerñthe', 13),
 ('(sunday', 13),
 ('manñthe', 13),
 ('`to', 13),
 ('truthñthe', 13),
 ('law)', 13),
 ('ñeven', 13),
 ('(london)', 13),
 ('le/vites', 13),
 ('landsñby', 13),
 ('r¡', 13),
 ('[christian', 13),
 ('extras)', 13),
 ("ñ'", 13),
 ('[catholic]', 13),
 ('appealñnational', 13),
 ('ñwhether', 13),
 ('`¥', 13),
 ('beñhow', 13),
 ('invadedñthe', 13),
 ('dayñthe', 13),
 ('dayñand', 13),
 ('(ex', 13),
 ('(fleming', 13),
 ('ñmay', 13),
 ('[laughter', 13),
 ('government)', 13),
 ('ñbible', 12),
 ('(but', 12),
 ('ãã', 12),
 ('(about', 12),
 ('(s', 12),
 ('empireñwhat', 12),
 ('_a', 12),
 ('(gal', 12),
 ('_of', 12),
 ('ñgeorge', 12),
 ('<at', 12),
 ('christñthe', 12),
 ('/dominion', 12),
 ('as¥', 12),
 ('worldñthe', 12),
 ('politicalñthomas', 12),
 ('`for', 12),
 ('helpsñnumerous', 12),
 ('(he', 12),
 ('(neb', 12),
 ('stateñthe', 12),
 ('illustrationsñsplendid', 12),
 ('ñdid', 12),
 ('i¥', 12),
 ('uaryña', 12),
 ('allñthe', 11),
 ('mapsñconcordanceñsubject', 11),
 ('{', 11),
 ('(presbyterian)', 11),
 ('*the', 11),
 ('states]', 11),
 ('ñsamuel', 11),
 ('ñmoral', 11),
 ('historyñby', 11),
 ('state)', 11),
 ('`this', 11),
 ("¥'", 11),
 ('(it', 11),
 ('or¥', 11),
 ('them)', 11),
 ('lawñ', 11),
 ('ñnamely', 11),
 ('re_', 11),
 ('[for', 11),
 ('¥i', 11),
 ('thisñthe', 11),
 ('peopleñthe', 11),
 ('(ill', 11),
 ('be)', 11),
 ('sabbath)', 11),
 ('ñbishop', 11),
 ('(delivered', 11),
 ('ñde', 11),
 ('therein)', 11),
 ('worldñ', 11),
 ('ñnow', 11),
 ('chinañby', 11),
 ("(gentleman's", 11),
 ('cover)', 11),
 ('ñtheñ', 11),
 ('(methodist)', 11),
 ('size)', 11),
 ('indexñvocabu-', 10),
 ('(d)', 10),
 ('/and', 10),
 ('sentinel]', 10),
 ('(eze', 10),
 ('ñfaith', 10),
 ('ñindependent', 10),
 ('ñwhere', 10),
 ('ñwere', 10),
 ('la*', 10),
 ('states)', 10),
 ('/ah', 10),
 ('ñalsoñ', 10),
 ('(specimen', 10),
 ('ñreligion', 10),
 ('`an', 10),
 ('ha/rim', 10),
 ('(an', 10),
 ('more)', 10),
 ('(pp', 10),
 ('churchesñas', 10),
 ('ñj', 10),
 ('__', 10),
 ('(the)ñby', 10),
 ('religionñand', 10),
 ('``', 10),
 ('ñreview', 10),
 ('bookñthe', 10),
 ('(alexander', 10),
 ('[present', 10),
 ('nomñany', 10),
 ('this¥', 10),
 ('postñoffice', 10),
 ('ç', 10),
 ('con_', 10),
 ('`if', 10),
 ('(such', 10),
 ('themñ', 10),
 ('peoplesñ', 10),
 ('bible]', 10),
 ('%c', 10),
 ('others)', 10),
 ('governmentñ', 10),
 ("')", 10),
 ('ñs', 10),
 ('`a', 10),
 ('him)', 10),
 ('godñand', 10),
 ('`that', 10),
 ('ñsome', 10),
 ('[is]', 10),
 ('(two-thirds', 10),
 ('(st', 10),
 ('morality]', 10),
 ('and_', 10),
 ('campbell)', 10),
 ('*this', 9),
 ('¥be', 9),
 ('adventist)', 9),
 ('manñ', 9),
 ('union)', 9),
 ('`no', 9),
 ('(so', 9),
 ('ñamerican', 9),
 ('itñand', 9),
 ('[see', 9),
 ('e¥', 9),
 ('ñliberty', 9),
 ('(at', 9),
 ('[his', 9),
 ('`almost', 9),
 ('ñfrances', 9),
 ('cents)', 9),
 ('peopleña', 9),
 ('¥by', 9),
 ('ñkatherine', 9),
 ('ñchurch', 9),
 ('ñyour', 9),
 ('ñnothing', 9),
 ('(one', 9),
 ('ñsee', 9),
 ('(art', 9),
 ('stateñ', 9),
 ('ñdoes', 9),
 ('baptist)', 9),
 ('(iowa)', 9),
 ('earthñthe', 9),
 ('be¥', 9),
 ('(poetry', 9),
 ('ñsir', 9),
 ('itñthe', 9),
 ('was¥', 9),
 ('ñtheir', 9),
 ('papacy]', 9),
 ('ñlondon', 9),
 ('ñselected', 9),
 ('pope]', 9),
 ('[as', 9),
 ('ñupon', 9),
 ('ñcan', 9),
 ('libertyñthe', 9),
 ('homeñ', 9),
 ('of_', 9),
 ('constitution)', 8),
 ('(signed)', 8),
 ('gospelñthe', 8),
 ('ñofficial', 8),
 ('beñ', 8),
 ('do)', 8),
 ('their¥', 8),
 ('ñjudge', 8),
 ('oneñthe', 8),
 ('peopleñ', 8),
 ('[small]', 8),
 ('laws)', 8),
 ('`yes', 8),
 ('large]', 8),
 ('watchwordñthe', 8),
 ('weekñthe', 8),
 ('=the', 8),
 ('weightñwithout', 8),
 ('speedñwonderful', 8),
 ('keysñthirty', 8),
 ('sentinelñdear', 8),
 ('societyñmrs', 8),
 ('(some', 8),
 ('wasñ', 8),
 ('(their', 8),
 ('offerñby', 8),
 ('menñ', 8),
 ('governmentñthat', 8),
 ('(v', 8),
 ('(james', 8),
 ('touchñlight', 8),
 ('[here', 8),
 ('(micr', 8),
 ('constitutionalñfrom', 8),
 ('keyboardñuniversal', 8),
 ('lawñthat', 8),
 ('alvierica_n', 8),
 ('ñloyal', 8),
 ('¥¥-¥', 8),
 ('ñgold', 8),
 ('ñthough', 8),
 ('\\ttin', 8),
 ('ñchris-', 8),
 ('ñcolorado', 8),
 ('that_', 8),
 ('*as', 8),
 ('ñpublius', 8),
 ('[large]', 8),
 ('is)', 8),
 ('ñour', 8),
 ('churches)', 8),
 ('_to', 8),
 ('ñmelbourne', 8),
 ('siredñmedium', 8),
 ('re¥', 8),
 ('case)', 8),
 ('cut)', 8),
 ('°', 8),
 ('(seventh-day', 8),
 ('sayñ', 8),
 ('¥¥¥¥¥', 8),
 ('copyrighted)', 8),
 ('constitution]', 8),
 ('(papacy)', 8),
 ('alignmentñperfect', 8),
 ('ñsilver', 8),
 ('(brown)', 8),
 ('(without', 8),
 ('languagesñseven', 8),
 ('(h', 8),
 ('sabbath]', 8),
 ('course)', 8),
 ('(still', 8),
 ('(february', 8),
 ('`one', 8),
 ('patenteeñto', 8),
 ('ñevery', 8),
 ('(bourgeois)', 8),
 ('godñin', 8),
 ('\\t', 8),
 ('week)', 8),
 ('¥-¥-¥', 8),
 ('wheelñsteel', 8),
 ('(eng', 8),
 ('ñsel', 8),
 ('sabbathñis', 8),
 ('christians)', 8),
 ('governmentña', 8),
 ('romeñthe', 8),
 ('[loud', 8),
 ('[cheers', 8),
 ('(unless', 8),
 ('ours)', 7),
 ('semiteñand', 7),
 ('ñfree', 7),
 ('(laughter', 7),
 ('people)', 7),
 ('¥-¥¥', 7),
 ('(wis', 7),
 ('(according', 7),
 ('nationñthe', 7),
 ('dayñ', 7),
 ('christianityñthe', 7),
 ('i/', 7),
 ('country)', 7),
 ('to_', 7),
 ('©', 7),
 ('labor]', 7),
 ('ñdecline', 7),
 ('(conclusion', 7),
 ('god)', 7),
 ('(boston)', 7),
 ('we¥', 7),
 ('beast]', 7),
 ('ñthree', 7),
 ('man)', 7),
 ('(unitarian)', 7),
 ('¥but', 7),
 ('ñmy', 7),
 ('say)', 7),
 ('[very', 7),
 ('(hear', 7),
 ('(-', 7),
 ('wordsñ', 7),
 ('all)', 7),
 ('ñgibbon', 7),
 ('(working', 7),
 ('(when', 7),
 ('_that', 7),
 ('(although', 7),
 ('world)', 7),
 ('self=cleaning', 7),
 ('historyñthemes', 7),
 ('libertyñ', 7),
 ('be_', 7),
 ('sabbathñ', 7),
 ('but]', 7),
 ('ñshall', 7),
 ('(lev', 7),
 ('[meaning', 7),
 ('religionñ', 7),
 ('(it)', 7),
 ('(on', 7),
 ('-_', 7),
 ('[n', 7),
 ('*i', 7),
 ('ñafter', 7),
 ('nationñwhat', 7),
 ('iã', 7),
 ('-)', 7),
 ('constitutionñthe', 7),
 ('ñjewish', 7),
 ('peopleñand', 7),
 ('theñ', 7),
 ('(phil', 7),
 ('have¥', 7),
 ('(civil', 7),
 ('bathñthe', 7),
 ('matterñas', 7),
 ('ñdetroit', 7),
 ('ñcol', 7),
 ('(they', 7),
 ('*a', 7),
 ('godña', 7),
 ('„', 7),
 ('(chap', 7),
 ('moral]', 7),
 ('mcallisterñno', 7),
 ('ca_n', 7),
 ('so)', 7),
 ('ñlessons', 7),
 ('[which', 7),
 ('ñpublishers)', 7),
 ('tionñthe', 7),
 ('people]', 7),
 ("te(ichelis'", 7),
 ('-(', 7),
 ('stateña', 7),
 ('lawñto', 7),
 ('(section', 7),
 ('themñand', 7),
 ('ñhave', 7),
 ('[hear', 7),
 ('that¥', 7),
 ('ñhad', 7),
 ('m¥', 7),
 ('[special', 7),
 ('papacyñthe', 7),
 ('toast)', 7),
 ('tionñwhat', 7),
 ('ö', 7),
 ('[made', 7),
 ('(t', 7),
 ('`you', 7),
 ('`my', 7),
 ('menñthe', 7),
 ('_and', 7),
 ('menña', 7),
 ('johnsonñ', 7),
 ('thisñthat', 7),
 ('[god]', 7),
 ('✓', 7),
 ('ñsince', 7),
 ('organ)', 7),
 ('dayña', 7),
 ('(now', 7),
 ('[cries', 7),
 ('no*', 7),
 ('[it', 7),
 ('(all', 6),
 ('ñaddress', 6),
 ('america_n', 6),
 ('way)', 6),
 ('ñprice', 6),
 ('[worship]', 6),
 ('themñthat', 6),
 ('christianityñ', 6),
 ('ñadvent', 6),
 ('not)', 6),
 ('badñ(having)', 6),
 ('g)', 6),
 ('ñsurroundings', 6),
 ('ñindeed', 6),
 ('sundayña', 6),
 ('(two', 6),
 ('a\\', 6),
 ('`our', 6),
 ('(mo', 6),
 ('¥or', 6),
 ('ñcloth', 6),
 ('stateñby', 6),
 ('ci)', 6),
 ('ñpaul', 6),
 ('born)', 6),
 ('%/', 6),
 ('society)', 6),
 ('done)', 6),
 ('¥for', 6),
 ('(vol', 6),
 ('ñgalen', 6),
 ('o)', 6),
 ('tian(', 6),
 ('e)', 6),
 ('ñstate', 6),
 ('ñyea', 6),
 ('ñshould', 6),
 ('[sun-', 6),
 ('stã', 6),
 ("jonesñ'", 6),
 ('¥`', 6),
 ('ñspain', 6),
 ('[i', 6),
 ('ñminnie', 6),
 ('saysñ', 6),
 ('peopleñthat', 6),
 ('ñwhile', 6),
 ('congressñthe', 6),
 ('`sunday', 6),
 ('`in', 6),
 ('[if', 6),
 ('faithñ', 6),
 ('(whether', 6),
 ('/-', 6),
 ('ñjoaquin', 6),
 ('rica_n', 6),
 ('(like', 6),
 ('margin]', 6),
 ('ñgood', 6),
 ('¥is', 6),
 ('laws]', 6),
 ('`is', 6),
 ('ñofñ', 6),
 ('countryñthe', 6),
 ('statesñreligious', 6),
 ('adventists)', 6),
 ('faithñneeds', 6),
 ('[st', 6),
 ('safe]', 6),
 ('a_ivierican', 6),
 ('-¥-¥', 6),
 ('~¥', 6),
 ('power]', 6),
 ('countryñ', 6),
 ('excellentñchristian', 6),
 ('thingsñthe', 6),
 ('butñ', 6),
 ('ñibid', 6),
 ('(texas)', 6),
 ('i*', 6),
 ('(code', 6),
 ('(may', 6),
 ('dayñare', 6),
 ('<<', 6),
 ('wordsña', 6),
 ('ñprof', 6),
 ('lawñis', 6),
 ('ñdear', 6),
 ('[under', 6),
 ('statesñand', 6),
 ('s\x8eance', 6),
 ('societyñl', 6),
 ('ñnever', 6),
 ('ñunited', 6),
 ('revealedñthat', 6),
 ('ñmost', 6),
 ('libraryñ', 6),
 ('ñpp', 6),
 ('r¥', 6),
 ('worship)', 6),
 ('crime(', 6),
 ('government]', 6),
 ('(sun-', 6),
 ('ñblind', 6),
 ('¥with', 6),
 ('journal==', 6),
 ('sentinelñ', 6),
 ('¥¥¥¥¥¥', 6),
 ('it¥', 6),
 ('/i', 6),
 ('caesarñthe', 6),
 ('churchñto', 6),
 ('ñcharles', 6),
 ('ñed', 6),
 ('ñdayton', 6),
 ('(even', 6),
 ('(verses', 6),
 ('¥are', 6),
 ('margin)', 6),
 ('kindñwithin', 6),
 ('anierica_n', 6),
 ('peopleñis', 6),
 ('authorityñthe', 6),
 ('congress)', 6),
 ('`god', 6),
 ('religion)', 6),
 ("ñd'aubigne", 6),
 ('addressñ', 6),
 ('rome]', 6),
 ('(catholic', 6),
 ('god]', 6),
 ('-¥¥', 6),
 (')-', 6),
 ('religion]', 6),
 ("o'neil]", 6),
 ('(md', 6),
 ('useñexcellent', 6),
 ('itñto', 6),
 ('ours]', 6),
 ('ò', 6),
 ('b¥', 6),
 ('beña', 6),
 (')f', 6),
 ('`remember', 6),
 ('¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥', 6),
 ('ñfair', 6),
 ('ha¥s', 6),
 ('(under', 6),
 ('hear)', 6),
 ('(ohio)', 6),
 ('(december', 6),
 ('raiira¥', 6),
 ('¥on', 6),
 ('(kansas)', 6),
 ('¨', 6),
 ('ñd', 5),
 ('(showing', 5),
 ('[living]', 5),
 ('de¥', 5),
 ('adventists]', 5),
 ('ôc', 5),
 ('papacyñthat', 5),
 ('on¥', 5),
 ('manualñexpert', 5),
 ('churches]', 5),
 ('days)', 5),
 ('calculatorñliterary', 5),
 ('christian)', 5),
 ('crafts)', 5),
 ('(jer', 5),
 ('ho)', 5),
 ('should¥', 5),
 ('geary]', 5),
 ('i(', 5),
 ('christñ', 5),
 ('/s', 5),
 ('man¥', 5),
 ('nuisanceñthe', 5),
 ('reform]', 5),
 ('asñ', 5),
 ('father]', 5),
 ('especiallyñwriters', 5),
 ('ñeither', 5),
 ('*¥', 5),
 ('ñhere', 5),
 ('ñabout', 5),
 ('bibleñand', 5),
 ('ment)', 5),
 ('addressñpacific', 5),
 ("(milman's", 5),
 ('ñsalesmen', 5),
 ("'ñthe", 5),
 ('¥as', 5),
 ('ñpresident', 5),
 ('`t', 5),
 ('ñgo', 5),
 ('keyñthe', 5),
 ('[at', 5),
 ('ñharvest', 5),
 ('(copyrighted)', 5),
 ('priceñattractive', 5),
 ('ñmargaret', 5),
 ('liberty]', 5),
 ('f¥', 5),
 ("'`", 5),
 ('himñ', 5),
 ('principles¥', 5),
 ('(order', 5),
 ('ñdean', 5),
 ('power)', 5),
 ('[these', 5),
 ('societyña', 5),
 ('principlesñthe', 5),
 ('ha/nan', 5),
 ('beastñthe', 5),
 ('sab_', 5),
 ('ñmail', 5),
 ('`as', 5),
 ('[accounted', 5),
 ('ñhalf', 5),
 ('iñ', 5),
 ('-¥-', 5),
 ('christ)', 5),
 ('powerñ', 5),
 ('his¥', 5),
 ('(companion', 5),
 ('firstñthe', 5),
 ('are¥', 5),
 ("¡'", 5),
 ...]

No obvious foreign language character use. "ñ" appears attached to words as an OCR noise element.

In [13]:
# %load shared_elements/normalize_characters.py
prev = "baseline"
cycle = "correction1"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    # Substitute for all other dashes
    content = re.sub(r"—-—–‑", r"-", content)

    # Substitute formatted apostrophe
    content = re.sub(r"\’\’\‘\'\‛\´", r"'", content)
    
    # Replace all special characters with a space (as these tend to occur at the end of lines)
    content = re.sub(r"[^a-zA-Z0-9\s,.!?$:;\-&\'\"]", r" ", content)
    
    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [14]:
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction1

Average verified rate: 0.9493256707400606

Average of error rates: 0.051667561921814396

Total token count: 8518613

In [15]:
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 500 )
Out[15]:
[('-', 13223),
 ("'", 8384),
 ('tion', 6765),
 ('con-', 5889),
 ('re-', 5797),
 ('t', 4325),
 ('ment', 4118),
 ('in-', 3972),
 ('d', 3947),
 ('e', 3885),
 ('w', 3760),
 ('co', 3427),
 ('m', 3164),
 ('n', 2998),
 ('de-', 2931),
 ('com-', 2716),
 ('be-', 2455),
 ('pro-', 2155),
 ('f', 2010),
 ('r', 1978),
 ('sun-', 1967),
 ('chris-', 1947),
 ('ex-', 1880),
 ('th', 1703),
 ('tions', 1683),
 ('en-', 1577),
 ('dis-', 1566),
 ('govern-', 1495),
 ('g', 1356),
 ('gov-', 1340),
 ('per-', 1284),
 ('sab-', 1254),
 ('tian', 1190),
 ('mo', 1156),
 ('un-', 1145),
 ('na-', 1136),
 ('ernment', 1070),
 ('reli-', 1058),
 ('ance', 1036),
 ('ob-', 1011),
 ('pre-', 999),
 ('ments', 947),
 ('ity', 944),
 ('sunday-law', 940),
 ('ad-', 939),
 ('tional', 914),
 ('u', 913),
 ('ac-', 913),
 ('x', 860),
 ('ligious', 838),
 ('im-', 816),
 ('ap-', 811),
 ('ple', 798),
 ('sub-', 740),
 ('ence', 722),
 ('an-', 700),
 ('gious', 675),
 ('relig-', 666),
 ('ers', 628),
 ('ques-', 625),
 ('peo-', 625),
 ('at-', 603),
 ("'the", 600),
 ('al-', 588),
 ('as-', 578),
 ('inter-', 562),
 ('to-', 556),
 ('pub-', 550),
 ('them-', 545),
 ('fol-', 536),
 ('ligion', 523),
 ('prin-', 523),
 ('constitu-', 521),
 ('erty', 518),
 ('sup-', 508)]

Correction 2 -- Correct line endings

Reconnect words that were split due to line-endings.

In [16]:
# %load shared_elements/correct_line_endings.py
prev = cycle
cycle = "correction2"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    content = re.sub(r"(\w+)(\-\s{1,})([a-z]+)", r"\1\3", content)

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [17]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction2

Average verified rate: 0.9777065723174374

Average of error rates: 0.024393763055804242

Total token count: 8357037

In [18]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 250 )
Out[18]:
[('-', 13127),
 ("'", 8384),
 ('t', 4309),
 ('d', 3941),
 ('e', 3874),
 ('w', 3758),
 ('co', 3424),
 ('m', 3155),
 ('n', 2990),
 ('f', 1989),
 ('r', 1971),
 ('th', 1703),
 ('g', 1339),
 ('mo', 1156),
 ('sunday-law', 976),
 ('u', 911),
 ('x', 860),
 ("'the", 600),
 ('--', 418),
 ('pa', 415),
 ('q', 373),
 ('chain-gang', 353),
 ('sunday-closing', 335),
 ('tion', 327),
 ("the'", 313),
 ('k', 304),
 ('ex', 304),
 ('sunday-rest', 299),
 ('pp', 298),
 ('-the', 289),
 ("conscience'", 265),
 ('ch', 257)]

Correction 3 -- Remove extra dashes

In [19]:
# %load shared_elements/remove_extra_dashes.py
prev = cycle
cycle = "correction3"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    replacements = []
    for token in tokens:
        if token[0] is "-":
            replacements.append((token, token[1:]))
            
        elif token[-1] is "-":
            replacements.append((token, token[:-1]))
        else:
            pass
        
    if len(replacements) > 0:
#         print("{}: {}".format(filename, replacements))
        
        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [20]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction3

Average verified rate: 0.9814938307988949

Average of error rates: 0.020385108922709644

Total token count: 8367534

In [21]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[21]:
[("'", 8554),
 ('t', 4413),
 ('e', 4034),
 ('d', 3964),
 ('w', 3794),
 ('co', 3624),
 ('m', 3204),
 ('n', 3085),
 ('r', 2060),
 ('f', 2026),
 ('th', 1718),
 ('g', 1378),
 ('mo', 1162),
 ('u', 928),
 ('x', 864),
 ("'the", 601),
 ('ex', 539),
 ('re', 505),
 ('pa', 423),
 ('q', 399),
 ('sunday-law', 334),
 ('tion', 328),
 ('k', 319),
 ("the'", 313),
 ('pp', 299),
 ("conscience'", 265),
 ('ch', 260),
 ('seventhday', 249),
 ("'of", 238),
 ('ti', 228),
 ('ga', 227),
 ('oc', 219),
 ('z', 215),
 ('wm', 215),
 ('un', 210),
 ('satolli', 210),
 ('employes', 209),
 ('munn', 206),
 ('ca', 205),
 ('al', 204),
 ('yo', 202),
 ('mc', 191),
 ('ry', 185),
 ('id', 185),
 ('ment', 183),
 ("'a", 175),
 ('sunday-closing', 159),
 ("'to", 154),
 ('se', 153),
 ('nd', 152)]

Correction 4 -- Remove extra quotation marks

In [22]:
# %load shared_elements/remove_extra_quotation_marks.py
prev = cycle
cycle = "correction4"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    corrections = []
    for token in tokens:
        token_list = list(token)
        last_char = token_list[-1]

        if last_char is "'":
            if len(token) > 1:
                if token_list[-2] is 's' or 'S':
                    pass
                else:
                    corrections.append((token, re.sub(r"'", r"", token)))
            else:
                pass
        elif token[0] is "'":
            corrections.append((token, re.sub(r"'", r"", token)))   
        else:
            pass
    
    if len(corrections) > 0:
#         print('{}: {}'.format(filename, corrections))

        for correction in corrections:
            content = clean.replace_pair(correction, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [23]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction4

Average verified rate: 0.9824621432313876

Average of error rates: 0.01930259623992838

Total token count: 8367328

In [24]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[24]:
[("'", 8053),
 ('t', 4487),
 ('e', 4070),
 ('d', 3979),
 ('w', 3797),
 ('co', 3626),
 ('m', 3209),
 ('n', 3104),
 ('r', 2078),
 ('f', 2046),
 ('th', 1721),
 ('g', 1386),
 ('mo', 1165),
 ('u', 930),
 ('x', 865),
 ('ex', 539),
 ('re', 508),
 ('pa', 427),
 ('q', 401),
 ('sunday-law', 334),
 ('tion', 329),
 ('k', 320),
 ("the'", 304),
 ('pp', 299),
 ("conscience'", 261),
 ('ch', 260),
 ('seventhday', 249),
 ('ti', 230),
 ('ga', 228),
 ('oc', 219),
 ('z', 217),
 ('wm', 215),
 ('un', 210),
 ('satolli', 210),
 ('employes', 209),
 ('al', 208),
 ('munn', 207),
 ('ca', 206),
 ('yo', 203),
 ('mc', 191),
 ('id', 189),
 ('ry', 186),
 ('ment', 183),
 ('sunday-closing', 160),
 ('nd', 159),
 ('se', 157),
 ('tinel', 147),
 ('ll', 141),
 ('il', 137),
 ('chain-gang', 136)]

Correction 5 -- Rejoin Burst Words

Check errors to see if they form verified tokens when joined with the preceding token.

In [25]:
# %load shared_elements/rejoin_burst_words.py
prev = cycle
cycle = "correction5"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    pattern = re.compile("(\s(\w{1,2}\s){5,})")
    
    replacements = []
    clean.check_splits(pattern, spelling_dictionary, content, replacements)
    
    if len(replacements) > 0:
#         print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [26]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction5

Average verified rate: 0.9824669426623706

Average of error rates: 0.01929677708146822

Total token count: 8367280

In [27]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[27]:
[("'", 8053),
 ('t', 4483),
 ('e', 4047),
 ('d', 3973),
 ('w', 3794),
 ('co', 3626),
 ('m', 3202),
 ('n', 3097),
 ('r', 2062),
 ('f', 2043),
 ('th', 1721),
 ('g', 1386),
 ('mo', 1165),
 ('u', 930),
 ('x', 865),
 ('ex', 539),
 ('re', 508),
 ('pa', 427),
 ('q', 401),
 ('sunday-law', 334),
 ('tion', 329),
 ('k', 320),
 ("the'", 304),
 ('pp', 299),
 ("conscience'", 261),
 ('ch', 260),
 ('seventhday', 249),
 ('ti', 230),
 ('ga', 228),
 ('oc', 219),
 ('z', 217),
 ('wm', 215),
 ('un', 210),
 ('satolli', 210),
 ('employes', 209),
 ('al', 208),
 ('munn', 207),
 ('ca', 206),
 ('yo', 203),
 ('mc', 191),
 ('id', 189),
 ('ry', 186),
 ('ment', 183),
 ('sunday-closing', 160),
 ('nd', 159),
 ('se', 157),
 ('tinel', 147),
 ('ll', 141),
 ('il', 137),
 ('chain-gang', 136)]

Correction 6 -- Rejoin Split Words

In [28]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction6"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    errors = reports.identify_errors(tokens, spelling_dictionary)

    replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=False)
    
    if len(replacements) > 0:
#         print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_split_words(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [29]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction6

Average verified rate: 0.9828407009325851

Average of error rates: 0.018864070426738287

Total token count: 8365027

In [30]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )
Out[30]:
[("'", 8053),
 ('t', 4443),
 ('e', 3991),
 ('d', 3960),
 ('w', 3791),
 ('co', 3415),
 ('m', 3195),
 ('n', 3057),
 ('r', 2043),
 ('f', 2042),
 ('th', 1658),
 ('g', 1377),
 ('mo', 1161),
 ('u', 927),
 ('x', 864),
 ('ex', 523),
 ('pa', 410),
 ('q', 399),
 ('sunday-law', 334),
 ('k', 318),
 ("the'", 304),
 ('pp', 299),
 ('tion', 281),
 ('re', 279),
 ("conscience'", 260),
 ('ch', 254),
 ('seventhday', 249),
 ('ga', 221),
 ('oc', 218),
 ('z', 215),
 ('wm', 215),
 ('satolli', 210),
 ('employes', 209),
 ('munn', 207),
 ('ti', 203),
 ('id', 186),
 ('ry', 183),
 ('al', 175),
 ('un', 174),
 ('ment', 173),
 ('ca', 170),
 ('sunday-closing', 160),
 ('nd', 158),
 ('tinel', 147),
 ('se', 143),
 ('ll', 141),
 ('chain-gang', 136),
 ("to'", 134),
 ('il', 127),
 ('lb', 125),
 ('bateham', 122),
 ('cmsar', 121),
 ('aleck', 112),
 ("and'", 109),
 ('socalled', 106),
 ('te', 105),
 ('sunday-rest', 104),
 ('cc', 104),
 ('sentin', 104),
 ('milly', 103),
 ('erican', 102),
 ("of'", 98),
 ('va', 97),
 ('cd', 94),
 ('nt', 93),
 ('fellow-citizens', 92),
 ('vt', 92),
 ('ican', 90),
 ('op', 90),
 ('tt', 89),
 ('ft', 89),
 ('aa', 89),
 ('-', 88),
 ("a'", 86),
 ('eze', 84),
 ('attorney-general', 83),
 ('ma', 83),
 ('csar', 81),
 ('stundists', 80),
 ('cereola', 79),
 ('lc', 79),
 ('ay', 78),
 ('neander', 78),
 ('ne', 77),
 ('li', 77),
 ('religio-political', 76),
 ('tions', 75),
 ('mc', 74),
 ('law-abiding', 74),
 ('sundaylaw', 74),
 ('ni', 73),
 ('rican', 73),
 ('ra', 73),
 ('ia', 72),
 ('edmunds', 71),
 ('rd', 71),
 ("crafts's", 70),
 ('es', 70),
 ("is'", 69),
 ('si', 69),
 ('sr', 69),
 ('freethought', 68),
 ("in'", 67),
 ("crafts'", 66),
 ("folks'", 66),
 ("cmsar's", 66),
 ('un-american', 64),
 ('sabbath-day', 63),
 ('rest-day', 62),
 ('ity', 61),
 ('ie', 61),
 ('ic', 60),
 ('sabbath-breaking', 60),
 ("''", 59),
 ('mt', 59),
 ("'s", 59),
 ('pr', 58),
 ('na', 57),
 ('ac', 57),
 ('ernment', 56),
 ('candidus', 56),
 ("barbers'", 56),
 ('paeifie', 56),
 ('ments', 55),
 ('tregelles', 54),
 ('geikie', 54),
 ("that'", 54),
 ('ri', 53),
 ("an'", 53),
 ('ce', 53),
 ('dred', 53),
 ('employe', 52),
 ("it'", 52),
 ('ky', 51),
 ('assoeiation', 51),
 ('ob', 51),
 ('litt', 51),
 ('wellknown', 50),
 ('tional', 50),
 ('erty', 50),
 ('coxey', 50),
 ('ci', 50),
 ('ofthe', 49),
 ('ary', 49),
 ('vo', 48),
 ('tian', 48),
 ('aro', 48),
 ('ph', 48),
 ('ow', 47),
 ('pre', 47),
 ('ple', 47),
 ('ent', 47),
 ('fa', 47),
 ('cathedra', 46),
 ("jones'", 46),
 ('ro', 46),
 ('leiper', 45),
 ('mi', 45),
 ('forit', 45),
 ("citizens'", 44),
 ('sh', 43),
 ('sundayclosing', 43),
 ('judefind', 42),
 ('wo', 42),
 ('ei', 42),
 ('times-herald', 42),
 ('chaingang', 42),
 ('ct', 42),
 ('tischendorf', 42),
 ('ers', 41),
 ('bo', 41),
 ("o'keefe", 41),
 ('hiberty', 41),
 ('em', 41),
 ('cr', 41),
 ("be'", 41),
 ('merican', 40),
 ("law'", 40),
 ('ica', 40),
 ('sunday-keeping', 40),
 ('ea', 40),
 ('yo', 40),
 ('chapelle', 40),
 ('ip', 40),
 ('ance', 39),
 ("i'", 39),
 ('oi', 39),
 ('keane', 39),
 ('tv', 39),
 ('copygraph', 39),
 ("waterman's", 38),
 ('lachmann', 38),
 ('kai', 38),
 ("cruden's", 38),
 ('oa', 37),
 ('non-sectarian', 37),
 ('adress', 37),
 ("csar's", 37),
 ('saye', 37),
 ('ly', 37),
 ('ther', 37),
 ('ta', 37),
 ('io', 37),
 ('church-and-state', 37),
 ("american'", 37),
 ('terest', 36),
 ('tr', 36),
 ('coxe', 36),
 ('dwyer', 36),
 ("for'", 35),
 ("not'", 35),
 ('da', 35),
 ('pf', 35),
 ('tir', 35),
 ('td', 35),
 ('mass-meeting', 35),
 ('swiggart', 35),
 ("this'", 35),
 ('stitution', 35),
 ('ba', 35),
 ('fi', 35),
 ('az', 35),
 ('law-making', 34),
 ('first-day', 34),
 ('jagoe', 34),
 ('inthe', 34),
 ('godgiven', 34),
 ('ns', 34),
 ("church'", 34),
 ('entinel', 34),
 ('sa', 33),
 ('oo', 33),
 ('brunot', 33),
 ('ork', 33),
 ('ful', 33),
 ('cwsar', 33),
 ('ts', 33),
 ('gious', 33),
 ('eh', 33),
 ('cl', 33),
 ('ss', 32),
 ("as'", 32),
 ('vice-presidents', 32),
 ('base-ball', 32),
 ('ap', 32),
 ("infants'", 32),
 ('rt', 32),
 ('saloon-keepers', 32),
 ('lt', 31),
 ('sun-worship', 31),
 ("liberty'", 31),
 ('rs', 31),
 ('one-seventh', 31),
 ('prayer-meeting', 31),
 ('slattery', 31),
 ('colitical', 30),
 ('efical', 30),
 ('mn', 30),
 ('vox', 30),
 ('fr', 30),
 ('ith', 30),
 ('ao', 29),
 ('os', 29),
 ('mg', 29),
 ("are'", 29),
 ('oe', 29),
 ("roberts'", 29),
 ('ge', 29),
 ('tc', 29),
 ('rn', 29),
 ('kauffman', 29),
 ('fo', 29),
 ('crowther', 29),
 ("workingmen's", 29),
 ('puplishing', 29),
 ("all'", 29),
 ('holidayism', 28),
 ('eral', 28),
 ('self-preservation', 28),
 ('street-cars', 28),
 ("parkhurst's", 28),
 ('charta', 28),
 ('dei', 28),
 ('newyork', 28),
 ('durborow', 28),
 ('liberty-loving', 28),
 ('om', 28),
 ('zwiebach', 28),
 ("fathers'", 28),
 ('non-catholics', 28),
 ('tl', 28),
 ('fair-minded', 27),
 ("krug's", 27),
 ("sabbath'", 27),
 ('non-observance', 27),
 ('iu', 27),
 ('schurman', 27),
 ("cushing's", 27),
 ('ve', 27),
 ('mccauley', 27),
 ('ou', 27),
 ('self-defense', 27),
 ('theo', 27),
 ('fellow-man', 27),
 ("gibbons'", 27),
 ("or'", 26),
 ('counter-arguments', 26),
 ('sabbathkeeping', 26),
 ('ence', 26),
 ('ablegate', 26),
 ('fora', 26),
 ('fide', 26),
 ('platt', 26),
 ('thon', 26),
 ("with'", 26),
 ('pany', 26),
 ('itis', 26),
 ('hto', 26),
 ('bula', 26),
 ("god'", 26),
 ('ig', 26),
 ("which'", 26),
 ('pecci', 26),
 ('divinely-appointed', 26),
 ('ae', 26),
 ('non-religious', 26),
 ("by'", 26),
 ('ous', 26),
 ('selfgovernment', 26),
 ('loth', 25),
 ("lions'", 25),
 ('weakley', 25),
 ('eferson', 25),
 ('ab', 25),
 ('nethinim', 25),
 ('weyler', 25),
 ('feligious', 25),
 ('fah', 25),
 ("d'aubigne", 25),
 ('martinelli', 25),
 ('tn', 25),
 ("at'", 25),
 ('sundayschool', 25),
 ('ws', 25),
 ('ridpath', 25),
 ('tne', 25),
 ('publishinc', 25),
 ('ut', 24),
 ('krug', 24),
 ('ceesar', 24),
 ('stuttle', 24),
 ('mehan', 24),
 ('tothe', 24),
 ('tiie', 24),
 ('ligion', 24),
 ('ee', 24),
 ("preachers'", 24),
 ('ber', 24),
 ("if'", 24),
 ('phelan', 24),
 ('ib', 23),
 ('humbert', 23),
 ('ious', 23),
 ('lawabiding', 23),
 ('twentyfive', 23),
 ('atterbury', 23),
 ('sunday-sabbath', 23),
 ('nn', 23),
 ('arierican', 23),
 ('ble', 23),
 ("saints'", 23),
 ('tbe', 22),
 ('anb', 22),
 ('reli', 22),
 ('ili', 22),
 ('ef', 22),
 ('od', 22),
 ('bt', 22),
 ('tb', 22),
 ('ligious', 22),
 ("have'", 22),
 ('sas', 22),
 ('scudder', 22),
 ('sel', 22),
 ('wi', 22),
 ('gi', 22),
 ('anti-christian', 22),
 ("we'", 22),
 ('jeferson', 22),
 ('xact', 21),
 ("pub'rs", 21),
 ("grocers'", 21),
 ("e'", 21),
 ('comegys', 21),
 ('scovel', 21),
 ('sevent', 21),
 ('tianity', 21),
 ('tivity', 21),
 ('po', 21),
 ('ject', 21),
 ('mee', 21),
 ('ress', 21),
 ('witham', 21),
 ('thos', 21),
 ('ng', 21),
 ('yefferson', 21),
 ('ive', 21),
 ("sunday'", 21),
 ('notgive', 21),
 ('eousness', 21),
 ('postmaster-general', 20),
 ("hutchings'", 20),
 ('washburne', 20),
 ("he'", 20),
 ('religiopolitical', 20),
 ('kellog', 20),
 ('romer', 20),
 ('healthgiving', 20),
 ("soldiers'", 20),
 ('rr', 20),
 ("satolli's", 20),
 ('mcglynn', 20),
 ('sien', 20),
 ("from'", 20),
 ("hours'", 20),
 ('pt', 20),
 ('anierican', 20),
 ('rosemond', 20),
 ("vick's", 20),
 ('parens', 20),
 ('bythe', 20),
 ("on'", 20),
 ('eign', 20),
 ('longnecker', 20),
 ("was'", 20),
 ("printers'", 19),
 ('anierica', 19),
 ('pressense', 19),
 ('fbr', 19),
 ('micr', 19),
 ('everts', 19),
 ('rorabacher', 19),
 ("pastors'", 19),
 ("gov't", 19),
 ('tp', 19),
 ('iti', 19),
 ('ies', 19),
 ('ible', 19),
 ('seelye', 19),
 ('arther', 19),
 ('wishart', 19),
 ("people'", 19),
 ('cosgrove', 19),
 ('gt', 19),
 ('det', 19),
 ('lery', 19),
 ('abbe', 19),
 ('ey', 19),
 ('ation', 19),
 ("day'", 19),
 ('hagans', 19),
 ('montefiore', 19),
 ("will'", 19),
 ('governor-general', 19),
 ('chain-gangs', 19),
 ('law-makers', 19),
 ('stundist', 19),
 ('sundaykeeping', 18),
 ('dc', 18),
 ('tae', 18),
 ('enright', 18),
 ('anti-catholic', 18),
 ('non-interference', 18),
 ('tht', 18),
 ('oz', 18),
 ('efferson', 18),
 ("th'", 18),
 ('ol', 18),
 ('tarawera', 18),
 ('curlett', 18),
 ('tii', 18),
 ('tolstoi', 18),
 ('self-styled', 18),
 ('--', 18),
 ("would'", 18),
 ('nel', 18),
 ('um', 18),
 ('ccesar', 18),
 ('oity', 18),
 ('wa', 18),
 ('etteer', 18),
 ('avery-stuttle', 18),
 ('nnw', 17),
 ('mal', 17),
 ('bf', 17),
 ('prin', 17),
 ("righteousness'", 17),
 ('jt', 17),
 ('clingman', 17),
 ('cedarquist', 17),
 ('newyorkcity', 17),
 ('tra', 17),
 ('ical', 17),
 ('ricans', 17),
 ('saloon-keeper', 17),
 ('rubiana', 17),
 ('prisot', 17),
 ('post-offices', 17),
 ('theunited', 17),
 ('nu', 17),
 ("no'", 17),
 ('fl', 17),
 ('sabbathbreaking', 17),
 ("a'nan", 17),
 ('sommerville', 17),
 ('church-going', 17),
 ('vernment', 17),
 ('cood', 17),
 ('mullally', 17),
 ('self-governing', 17),
 ('ist', 17),
 ('bondst', 17),
 ('philpott', 17),
 ('law-breaker', 17),
 ('ik', 17),
 ('senti', 17),
 ('ul', 17),
 ('ame', 17),
 ('leivites', 17),
 ('pel', 17),
 ('vites', 17),
 ("apostles'", 17),
 ('hy', 17),
 ("schaff's", 16),
 ('dieu', 16),
 ('selfevident', 16),
 ('ag', 16),
 ('dayto', 16),
 ('ioo', 16),
 ('tf', 16),
 ('prepartion', 16),
 ('cp', 16),
 ("enright's", 16),
 ("his'", 16),
 ('mit', 16),
 ('oth', 16),
 ('relig', 16),
 ('thepeople', 16),
 ('sie', 16),
 ('alfaro', 16),
 ('symmachus', 16),
 ('xl', 16),
 ('su', 16),
 ('bi', 16),
 ('facto', 16),
 ('ples', 16),
 ('erromanga', 16),
 ('sunday-keepers', 16),
 ('dividual', 16),
 ('peryear', 16),
 ('peffer', 16),
 ('re-enact', 16),
 ('ish', 16),
 ('socialpurity', 16),
 ('tains', 16),
 ('cs', 16),
 ('wilkie', 16),
 ("l'", 16),
 ('hodgson', 16),
 ('basle', 16),
 ('bas', 16),
 ('zi', 15),
 ('tem', 15),
 ("who'", 15),
 ('ite', 15),
 ('sabbath-breakers', 15),
 ('americansentinel', 15),
 ('rhe', 15),
 ('nonsuch', 15),
 ('lieve', 15),
 ('pepsia', 15),
 ('co-workers', 15),
 ('gallinger', 15),
 ('labberton', 15),
 ('thatthe', 15),
 ('intrust', 15),
 ('lttra', 15),
 ('aw', 15),
 ('law-breakers', 15),
 ('milman', 15),
 ('plete', 15),
 ('lished', 15),
 ('rampolla', 15),
 ("christian'", 15),
 ('wellbeing', 15),
 ("milman's", 15),
 ('klip', 15),
 ('ons', 15),
 ('ctesar', 15),
 ("their'", 15),
 ('re-enacted', 15),
 ('populi', 15),
 ('governinent', 15),
 ('wor', 15),
 ('hach', 15),
 ('sc', 15),
 ("miles'", 15),
 ("ginn's", 15),
 ('ih', 15),
 ('janes', 15),
 ('ov', 15),
 ('sulus', 15),
 ('stinday', 15),
 ('xo', 15),
 ('ectarian', 15),
 ("o'gorman", 15),
 ('tkt', 15),
 ('ddress', 15),
 ("they'", 15),
 ('alvierica', 15),
 ('gress', 15),
 ('je', 15),
 ('birney', 15),
 ('ny', 15),
 ("religion'", 15),
 ('avery-stiittle', 15),
 ('sf', 15),
 ('tians', 15),
 ('np', 14),
 ('thb', 14),
 ('ver', 14),
 ('olic', 14),
 ('qa', 14),
 ("pres'ts", 14),
 ('secker', 14),
 ('intelligeneer', 14),
 ("'the", 14),
 ('yeferson', 14),
 ('ual', 14),
 ('self-exaltation', 14),
 ("tourists'", 14),
 ('chiniquy', 14),
 ('rittenhouse', 14),
 ('ormore', 14),
 ("moses'", 14),
 ('peo', 14),
 ('goverment', 14),
 ('thp', 14),
 ("sup'ts", 14),
 ("gault's", 14),
 ('cortlandt', 14),
 ('non-union', 14),
 ('br', 14),
 ("ccesar's", 14),
 ('anglo-saxons', 14),
 ('christain', 14),
 ('sp', 14),
 ('restday', 14),
 ('nr', 14),
 ('rv', 14),
 ('eemperance', 14),
 ('sabbath-breaker', 14),
 ('gb', 14),
 ("mcallister's", 14),
 ('rian', 14),
 ('malum', 14),
 ("williams'", 14),
 ("neander's", 14),
 ("adventists'", 14),
 ('lexow', 14),
 ('confreres', 14),
 ('thr', 14),
 ('ncluding', 14),
 ('af', 14),
 ('sient', 14),
 ('tution', 14),
 ('gl', 14),
 ('tennesseeans', 14),
 ('mu', 14),
 ("but'", 13),
 ('fon', 13),
 ('christ-like', 13),
 ('aivierican', 13),
 ('leaguers', 13),
 ('wu', 13),
 ("mf'g", 13),
 ('hoc', 13),
 ('dibbs', 13),
 ('anti-religious', 13),
 ('themies', 13),
 ('dont', 13),
 ('ex-president', 13),
 ('gr', 13),
 ("one'", 13),
 ('ine', 13),
 ('two-horned', 13),
 ('rp', 13),
 ("coxey's", 13),
 ('higinbotham', 13),
 ("t'", 13),
 ("protestants'", 13),
 ('pilman', 13),
 ('froni', 13),
 ('foi', 13),
 ('meeting-house', 13),
 ('mccourt', 13),
 ('thd', 13),
 ('waupon', 13),
 ("f'", 13),
 ("has'", 13),
 ('itt', 13),
 ('hiscock', 13),
 ('self-contradictory', 13),
 ("torry's", 13),
 ('cif', 13),
 ('gainst', 13),
 ("its'", 13),
 ('dn', 13),
 ('princi', 13),
 ('cer', 13),
 ('thi', 13),
 ('ec', 13),
 ('hee', 13),
 ('sabbathkeepers', 13),
 ('lelvites', 13),
 ('one-man', 13),
 ('tms', 13),
 ('rundschau', 13),
 ('tlie', 13),
 ('tax-payers', 13),
 ('non-christian', 13),
 ('self-appointed', 13),
 ("breeders'", 13),
 ('kossean', 13),
 ('olesen', 13),
 ('botkine', 13),
 ('ntinel', 13),
 ('volksraad', 13),
 ("whaley's", 12),
 ('constitu', 12),
 ('ke', 12),
 ('ets', 12),
 ('pm', 12),
 ('ess', 12),
 ('froin', 12),
 ('robb', 12),
 ('theire', 12),
 ('thein', 12),
 ('ors', 12),
 ('chappelle', 12),
 ("churches'", 12),
 ('self-constituted', 12),
 ('wouldbe', 12),
 ('entin', 12),
 ('week-day', 12),
 ('thority', 12),
 ('fast-day', 12),
 ("were'", 12),
 ('sk', 12),
 ('ex-mayor', 12),
 ('fortynine', 12),
 ('ture', 12),
 ('bok', 12),
 ('whitall', 12),
 ("cents'", 12),
 ('sition', 12),
 ('tte', 12),
 ('self-interest', 12),
 ('croker', 12),
 ("d'aubigne's", 12),
 ('merous', 12),
 ('cai', 12),
 ('combatting', 12),
 ('vention', 12),
 ('observa', 12),
 ('fp', 12),
 ('yonx', 12),
 ("such'", 12),
 ('ht', 12),
 ('ds', 12),
 ('masse', 12),
 ('self-respecting', 12),
 ('pc', 12),
 ('ivierican', 12),
 ('tobe', 12),
 ("do'", 12),
 ("christ'", 12),
 ('ki', 12),
 ("neat's", 12),
 ('twenty-fifth', 12),
 ('ttin', 12),
 ('maurer', 12),
 ('bondstreet', 12),
 ('inter-state', 12),
 ('lation', 12),
 ('ang', 12),
 ("any'", 12),
 ('rk', 12),
 ('gx', 12),
 ('sunday-observance', 12),
 ('havergal', 11),
 ("james'", 11),
 ('thechurch', 11),
 ('sm', 11),
 ('cz', 11),
 ('df', 11),
 ("dealers'", 11),
 ('ex-senator', 11),
 ('erson', 11),
 ('uncompromis', 11),
 ('mm', 11),
 ('ernments', 11),
 ('sherk', 11),
 ('fifty-second', 11),
 ('selfpreservation', 11),
 ('derstanding', 11),
 ('nished', 11),
 ('naw', 11),
 ('tre', 11),
 ("states'", 11),
 ('theni', 11),
 ("state'", 11),
 ('communica', 11),
 ('ular', 11),
 ('rose-wood', 11),
 ('androscoggin', 11),
 ("bakers'", 11),
 ('mis', 11),
 ('dition', 11),
 ('ure', 11),
 ('forthe', 11),
 ('taschereau', 11),
 ('qt', 11),
 ('tm', 11),
 ('griffitts', 11),
 ('fellow-workers', 11),
 ('kw', 11),
 ('bradfield', 11),
 ('houk', 11),
 ('fot', 11),
 ("so'", 11),
 ("'a", 11),
 ('amyot', 11),
 ('muskoka', 11),
 ('pl', 11),
 ('aivierica', 11),
 ('erties', 11),
 ('qf', 11),
 ('bl', 11),
 ('merica', 11),
 ('haye', 11),
 ('ost', 11),
 ('ev', 11),
 ('isthepapacyinprophecy', 11),
 ('sev', 11),
 ('mits', 11),
 ('notre', 11),
 ('key-note', 11),
 ('hirsch', 11),
 ('sealings', 11),
 ('rin', 11),
 ('evil-doers', 11),
 ('nott', 11),
 ("civil'", 11),
 ('ml', 11),
 ('ete', 11),
 ('kee', 11),
 ('rc', 11),
 ('yr', 11),
 ('ex-governor', 11),
 ('cramer', 11),
 ('lr', 11),
 ('fs', 11),
 ('informations', 11),
 ('paoipio', 11),
 ('twentyfour', 11),
 ('ridgetown', 11),
 ('axact', 11),
 ('times-democrat', 11)]

Correction 7 -- Rejoin Split Words II

In [31]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction7"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    errors = reports.identify_errors(tokens, spelling_dictionary)

    replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=True)
    
    if len(replacements) > 0:
        print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_split_words(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
AmSn18860101-V01-01-page3.txt: [('to', 're')]
AmSn18860301-V01-03-page1.txt: [('SEN', 'TINEL')]
AmSn18860301-V01-03-page3.txt: [('in', 'stitutions')]
AmSn18860301-V01-03-page8.txt: [('SEN', 'TINEL')]
AmSn18860401-V01-04-page1.txt: [('am', 'endmentists')]
AmSn18860601-V01-06-page1.txt: [('he', 're'), ('in', 'stitute')]
AmSn18860601-V01-06-page7.txt: [('PAR', 'LIAMENTARY'), ('AMER', 'ICAN')]
AmSn18860701-V01-07-page3.txt: [('fruit', 'ful')]
AmSn18860701-V01-07-page5.txt: [('a', 'ment')]
AmSn18860701-V01-07-page6.txt: [('Ch', 'aldea')]
AmSn18860801-V01-08-page1.txt: [('amen', 'dmentists')]
AmSn18860801-V01-08-page5.txt: [('the', 'Re')]
AmSn18860801-V01-08-page6.txt: [('to', 're')]
AmSn18860901-V01-09-page3.txt: [('Nation', 'al'), ('a', 'Na')]
AmSn18860901-V01-09-page4.txt: [('Amen', 'dmentists'), ('in', 'sistency')]
AmSn18860901-V01-09-page7.txt: [('and', 're')]
AmSn18861001-V01-10-page3.txt: [('when', 'ce')]
AmSn18861001-V01-10-page5.txt: [('be', 'ment')]
AmSn18861101-V01-11-page6.txt: [('the', 're')]
AmSn18861101-V01-11-page8.txt: [('Nation', 'al')]
AmSn18861201-V01-12-page1.txt: [('a', 're')]
AmSn18861201-V01-12-page2.txt: [('do', 'ers'), ('the', 'mis')]
AmSn18861201-V01-12-page6.txt: [('to', 're')]
AmSn18861201-V01-12-page7.txt: [('DE', 'STRUCTION'), ('ap', 'preciated')]
AmSn18870101-V02-01-page1.txt: [('fa', 'vorable')]
AmSn18870101-V02-01-page2.txt: [('Reform', 'ers'), ('SEN', 'TINEL'), ('re', 'sults'), ('in', 'dicated'), ('the', 're')]
AmSn18870101-V02-01-page3.txt: [('SEN', 'TINEL'), ('rep', 'robation')]
AmSn18870101-V02-01-page7.txt: [('sev', 'enty'), ('Constitution', 'al')]
AmSn18870301-V02-03-page2.txt: [('Social', 'istic'), ('in', 'scription')]
AmSn18870301-V02-03-page3.txt: [('la', 'Ws')]
AmSn18870301-V02-03-page4.txt: [('decal', 'ogue'), ('to', 'co'), ('Associ', 'ation')]
AmSn18870301-V02-03-page5.txt: [('f', 'orce')]
AmSn18870301-V02-03-page6.txt: [('Pres', 'byterian')]
AmSn18870301-V02-03-page8.txt: [('AMER', 'ICAN')]
AmSn18870401-V02-04-page6.txt: [('to', 'es'), ('amen', 'dment'), ('es', 'tablish')]
AmSn18870401-V02-04-page8.txt: [('SEN', 'TINEL')]
AmSn18870501-V02-05-page3.txt: [('au', 'thority')]
AmSn18870501-V02-05-page7.txt: [('COM', 'PLETE')]
AmSn18870601-V02-06-page8.txt: [('dimin', 'ished')]
AmSn18870701-V02-07-page3.txt: [('Gov', 'ernment')]
AmSn18870701-V02-07-page8.txt: [('b', 'id')]
AmSn18870801-V02-08-page8.txt: [('a', 'Te')]
AmSn18870901-V02-09-page6.txt: [('in', 'terest')]
AmSn18871001-V02-10-page1.txt: [('SEN', 'TINEL'), ('condi', 'tions')]
AmSn18871101-V02-11-page8.txt: [('Chris', 'tianity')]
AmSn18871201-V02-12-page1.txt: [('Associ', 'ation')]
AmSn18871201-V02-12-page3.txt: [('SEN', 'TINEL')]
AmSn18871201-V02-12-page5.txt: [('De', 'ception')]
AmSn18871201-V02-12-page8.txt: [('SEN', 'TINEL'), ('Con', 'Tention')]
AmSn18880101-V03-01-page1.txt: [('ex', 'actly')]
AmSn18880101-V03-01-page7.txt: [('r', 'oo'), ('sol', 'dier')]
AmSn18880101-V03-01-page8.txt: [('SEN', 'TINEL')]
AmSn18880201-V03-02-page1.txt: [('Sta', 'te')]
AmSn18880301-V03-03-page2.txt: [('conse', 'quently')]
AmSn18880301-V03-03-page8.txt: [('SEN', 'TINEL')]
AmSn18880401-V03-04-page1.txt: [('legis', 'lative')]
AmSn18880501-V03-05-page3.txt: [('a', 'nd'), ('hypo', 'Crites')]
AmSn18880501-V03-05-page7.txt: [('Prot', 'estant')]
AmSn18880601-V03-06-page2.txt: [('lib', 'erty')]
AmSn18880601-V03-06-page7.txt: [('you', 'th'), ('PRES', 'ERVATION')]
AmSn18880701-V03-07-page6.txt: [('ADDI', 'TIONAL')]
AmSn18880701-V03-07-page7.txt: [('ad', 'Vocate'), ('the', 're')]
AmSn18880801-V03-08-page5.txt: [('indus', 'trious')]
AmSn18880801-V03-08-page7.txt: [('biblic', 'al')]
AmSn18880901-V03-09-page4.txt: [('ar', 'rayed')]
AmSn18880901-V03-09-page5.txt: [('perse', 'cuted'), ('ten', 'ets'), ('we', 're')]
AmSn18880901-V03-09-page7.txt: [('con', 'tinuance')]
AmSn18881001-V03-10-page1.txt: [('SEN', 'TINEL')]
AmSn18881001-V03-10-page2.txt: [('fa', 'vor')]
AmSn18881001-V03-10-page6.txt: [('swe', 'eping')]
AmSn18881001-V03-10-page8.txt: [('amend', 'Ment')]
AmSn18881015-V03-10a-page5.txt: [('re', 'ligious'), ('SEN', 'TINEL')]
AmSn18881015-V03-10a-page7.txt: [('PRES', 'ERVATION')]
AmSn18881101-V03-11-page6.txt: [('genera', 'tions')]
AmSn18881201-V03-12-page3.txt: [('no', 'ma')]
AmSn18881201-V03-12-page4.txt: [('or', 'ganization')]
AmSn18881201-V03-12-page5.txt: [('temper', 'ance')]
AmSn18881201-V03-12-page6.txt: [('SEN', 'TINEL')]
AmSn18881201-V03-12-page8.txt: [('SEN', 'TINELS'), ('AMER', 'ICAN'), ('s', 'chool')]
AmSn18881201-V03-12-page9.txt: [('Go', 'dless'), ('Go', 'od'), ('Go', 'vernmental'), ('N', 'ational'), ('Go', 'vernment'), ('Go', 'spel')]
AmSn18890101-V04-01-page8.txt: [('SEN', 'TINEL')]
AmSn18890130-V04-02-page7.txt: [('car', 'ried'), ('C', 'ANVASSERS')]
AmSn18890130-V04-02-page8.txt: [('to', 'ro')]
AmSn18890206-V04-03-page2.txt: [('re', 'spect')]
AmSn18890206-V04-03-page7.txt: [('S', 'ABBATH')]
AmSn18890213-V04-04-page5.txt: [('SEN', 'TINEL'), ('par', 'ies')]
AmSn18890213-V04-04-page6.txt: [('j', 'ust'), ('ref', 'erence')]
AmSn18890213-V04-04-page7.txt: [('s', 'ABBATH')]
AmSn18890213-V04-04-page8.txt: [('insti', 'tuted'), ('AMER', 'ICAN')]
AmSn18890220-V04-05-page4.txt: [('i', 'ndorsement')]
AmSn18890220-V04-05-page7.txt: [('S', 'HORTHAND'), ('a', 'reli'), ('DIS', 'COVERIES')]
AmSn18890220-V04-05-page8.txt: [('SEN', 'TINEL')]
AmSn18890227-V04-06-page2.txt: [('the', 'Ca')]
AmSn18890227-V04-06-page7.txt: [('DIS', 'COVERIES')]
AmSn18890306-V04-07-page1.txt: [('e', 'th')]
AmSn18890306-V04-07-page3.txt: [('SEN', 'TINEL'), ('AMUSE', 'MENTS')]
AmSn18890306-V04-07-page5.txt: [('a', 'nd')]
AmSn18890306-V04-07-page7.txt: [('DIS', 'COVERIES')]
AmSn18890313-V04-08-page2.txt: [('h', 'alf')]
AmSn18890313-V04-08-page3.txt: [('so', 're')]
AmSn18890313-V04-08-page7.txt: [('an', 'Ab')]
AmSn18890320-V04-09-page1.txt: [('SEN', 'TINEL')]
AmSn18890320-V04-09-page7.txt: [('Ab', 'surdity'), ('an', 'Ab')]
AmSn18890327-V04-10-page7.txt: [('Or', 'ders')]
AmSn18890403-V04-11-page8.txt: [('Ber', 'th'), ('at', 'onement')]
AmSn18890410-V04-12-page7.txt: [('to', 'Ca'), ('p', 'OP'), ('Ca', 'sar')]
AmSn18890410-V04-12-page8.txt: [('SEN', 'TINEL'), ('la', 've'), ('AMER', 'ICAN')]
AmSn18890417-V04-13-page1.txt: [('AMER', 'ICAN')]
AmSn18890417-V04-13-page7.txt: [('S', 'OUTH'), ('S', 'HORTHAND'), ('T', 'ATTLE'), ('F', 'AMILY')]
AmSn18890417-V04-13-page8.txt: [('CALIF', 'ORNIA')]
AmSn18890424-V04-14-page7.txt: [('to', 'ri'), ('DIS', 'COVERIES'), ('e', 'CO')]
AmSn18890501-V04-15-page1.txt: [('as', 'sembly')]
AmSn18890501-V04-15-page7.txt: [('LIBRA', 'RY'), ('DIS', 'COVERIES'), ('con', 'tains')]
AmSn18890501-V04-15-page8.txt: [('con', 'tains')]
AmSn18890515-V04-16-page7.txt: [('LIBRA', 'RY')]
AmSn18890529-V04-18-page6.txt: [('AMER', 'ICAN')]
AmSn18890529-V04-18-page7.txt: [('LIBRA', 'RY'), ('A', 'RCHITECTS')]
AmSn18890529-V04-18-page8.txt: [('AMER', 'ICAN')]
AmSn18890605-V04-19-page1.txt: [('Ber', 'th')]
AmSn18890605-V04-19-page3.txt: [('toot', 'hbrushes')]
AmSn18890605-V04-19-page7.txt: [('L', 'ife')]
AmSn18890612-V04-20-page7.txt: [('s', 'ecs')]
AmSn18890612-V04-20-page8.txt: [('SEN', 'TINEL')]
AmSn18890619-V04-21-page1.txt: [('j', 'udicial')]
AmSn18890619-V04-21-page7.txt: [('a', 'nd')]
AmSn18890626-V04-22-page4.txt: [('lib', 'erty')]
AmSn18890626-V04-22-page7.txt: [('o', 'ct'), ('P', 'ENCIL')]
AmSn18890626-V04-22-page8.txt: [('AMER', 'ICAN')]
AmSn18890703-V04-23-page6.txt: [('immo', 'rality')]
AmSn18890703-V04-23-page7.txt: [('DIS', 'COVERIES'), ('to', 'rr')]
AmSn18890710-V04-24-page7.txt: [('DIS', 'COVERIES'), ('P', 'HOTOGRAPH')]
AmSn18890717-V04-25-page5.txt: [('ef', 'fectually')]
AmSn18890717-V04-25-page7.txt: [('L', 'ife'), ('H', 'appy')]
AmSn18890724-V04-26-page5.txt: [('or', 'iginators')]
AmSn18890724-V04-26-page7.txt: [('P', "UBLISHERS'"), ('A', 'lso')]
AmSn18890731-V04-27-page3.txt: [('har', 'mony')]
AmSn18890731-V04-27-page8.txt: [('SEN', 'TINEL')]
AmSn18890807-V04-28-page3.txt: [('an', 'sWerable')]
AmSn18890807-V04-28-page6.txt: [('of', 'fice')]
AmSn18890814-V04-29-page3.txt: [('SE', 'NTINEL'), ('on', 'ly')]
AmSn18890814-V04-29-page7.txt: [('A', 'RCHITECTS')]
AmSn18890821-V04-30-page7.txt: [('R', 'eform'), ('D', 'ress'), ('L', "adies'"), ('P', 'ACIFIC')]
AmSn18890828-V04-31-page2.txt: [('amuse', 'ments'), ('char', 'acter')]
AmSn18890828-V04-31-page4.txt: [('j', 'udiciary')]
AmSn18890828-V04-31-page7.txt: [('T', 'IA'), ('S', 'AFETY')]
AmSn18890905-V04-32-page3.txt: [('a', 'cre')]
AmSn18890911-V04-33-page2.txt: [('follow', 'ers')]
AmSn18890911-V04-33-page4.txt: [('discov', 'ered')]
AmSn18890911-V04-33-page7.txt: [('A', 'RE')]
AmSn18890918-V04-34-page5.txt: [('ac', 'tions'), ('con', 'trary')]
AmSn18890918-V04-34-page7.txt: [('i', 'ns')]
AmSn18890925-V04-35-page1.txt: [('per', 'secutes')]
AmSn18890925-V04-35-page2.txt: [('prov', 'ided')]
AmSn18890925-V04-35-page7.txt: [('cele', 'brated'), ('WIS', 'CONSIN')]
AmSn18891002-V04-36-page1.txt: [('re', 'spect')]
AmSn18891002-V04-36-page4.txt: [('bro', 'ught')]
AmSn18891002-V04-36-page7.txt: [('C', 'ivil'), ('P', 'ACIFIC'), ('W', 'aggoner')]
AmSn18891002-V04-36-page8.txt: [('re', 'spects'), ('who', 're'), ('persist', 'ent')]
AmSn18891009-V04-37-page1.txt: [('leg', 'islatures')]
AmSn18891009-V04-37-page2.txt: [('SEN', 'TINEL')]
AmSn18891009-V04-37-page6.txt: [('ab', 'sence')]
AmSn18891009-V04-37-page8.txt: [('SEN', 'TINEL')]
AmSn18891016-V04-38-page7.txt: [('WIS', 'CONSIN'), ('t', 'ie')]
AmSn18891016-V04-38-page8.txt: [('AMER', 'ICAN')]
AmSn18891023-V04-39-page4.txt: [('to', 'co')]
AmSn18891023-V04-39-page7.txt: [('WIS', 'CONSIN'), ('DIS', 'COVERIES')]
AmSn18891030-V04-40-page3.txt: [('dis', 'tinction')]
AmSn18891030-V04-40-page7.txt: [('WIS', 'CONSIN'), ('DIS', 'COVERIES')]
AmSn18891106-V04-41-page2.txt: [('state', 'ment')]
AmSn18891106-V04-41-page3.txt: [('a', 'll')]
AmSn18891106-V04-41-page8.txt: [('e', 'rr')]
AmSn18891113-V04-42-page1.txt: [('af', 'fections')]
AmSn18891113-V04-42-page2.txt: [('to', 'es')]
AmSn18891113-V04-42-page3.txt: [('a', 'ppointed'), ('a', 'nd'), ('a', 'll')]
AmSn18891113-V04-42-page5.txt: [('Gov', 'ernment')]
AmSn18891113-V04-42-page6.txt: [('re', 'ligious'), ('a', 're')]
AmSn18891113-V04-42-page7.txt: [('with', 'Ee'), ('S', 'cholarship')]
AmSn18891120-V04-43-page3.txt: [('fes', 'ses'), ('fail', 'ure')]
AmSn18891120-V04-43-page6.txt: [('B', 'IC'), ('an', 'noyance')]
AmSn18891120-V04-43-page8.txt: [('be', 'lieve')]
AmSn18891127-V04-44-page2.txt: [('persecu', 'tion')]
AmSn18891127-V04-44-page7.txt: [('to', 'NI'), ('inf', 'orm')]
AmSn18891204-V04-45-page3.txt: [('Massa', 'chusetts')]
AmSn18891204-V04-45-page7.txt: [('polit', 'ical'), ('comp', 'ete'), ('l', 'ino')]
AmSn18891211-V04-46-page8.txt: [('SEN', 'TINEL')]
AmSn18891218-V04-47-page1.txt: [('SEN', 'TINEL'), ('the', 'orize')]
AmSn18891218-V04-47-page5.txt: [('re', 'ligious')]
AmSn18891218-V04-47-page8.txt: [('SEN', 'TINEL'), ('AMER', 'ICAN')]
AmSn18891225-V04-48-page1.txt: [('SEN', 'TINEL'), ('pre', 'vailing')]
AmSn18891225-V04-48-page2.txt: [('Chris', 'tian')]
AmSn18891225-V04-48-page5.txt: [('we', 're')]
AmSn18891225-V04-48-page6.txt: [('danger', 'Ous')]
AmSn18891225-V04-48-page7.txt: [('ER', 'ICA')]
AmSn18891225-V04-48-page8.txt: [('ab', 'ut')]
AmSn18891225-V04-48-page9.txt: [('amend', 'ment')]
AmSn18900102-V05-01-page1.txt: [('SEN', 'TINEL')]
AmSn18900109-V05-02-page3.txt: [('SEN', 'TINEL')]
AmSn18900109-V05-02-page7.txt: [('P', 'IE')]
AmSn18900116-V05-03-page3.txt: [('AMER', 'ICAN')]
AmSn18900116-V05-03-page4.txt: [('Chris', 'tians')]
AmSn18900116-V05-03-page7.txt: [('houseke', 'epers'), ('S', 'ID')]
AmSn18900123-V05-04-page1.txt: [('fo', 'rce')]
AmSn18900123-V05-04-page2.txt: [('per', 'tains')]
AmSn18900123-V05-04-page4.txt: [('Con', 'gress')]
AmSn18900123-V05-04-page6.txt: [('a', 'nd'), ('a', 'rc')]
AmSn18900123-V05-04-page7.txt: [('ma', 'terial')]
AmSn18900123-V05-04-page8.txt: [('mi', 'Ssionary')]
AmSn18900130-V05-05-page1.txt: [('re', 'ceive'), ('to', 're')]
AmSn18900130-V05-05-page4.txt: [('addi', 'tion')]
AmSn18900130-V05-05-page5.txt: [('SEN', 'TINEL')]
AmSn18900130-V05-05-page7.txt: [('a', 'da'), ('the', 're')]
AmSn18900130-V05-05-page8.txt: [('treas', 'ury')]
AmSn18900206-V05-06-page3.txt: [('threat', 'ening')]
AmSn18900206-V05-06-page5.txt: [('fur', 'ther')]
AmSn18900206-V05-06-page7.txt: [('M', 'edicine'), ('P', 'IE'), ('D', 'om')]
AmSn18900206-V05-06-page8.txt: [('Legis', 'lature')]
AmSn18900213-V05-07-page7.txt: [('SIG', 'NS')]
AmSn18900220-V05-08-page1.txt: [('the', 'se')]
AmSn18900220-V05-08-page2.txt: [('utilita', 'rian')]
AmSn18900220-V05-08-page6.txt: [('sic', 'kness')]
AmSn18900227-V05-09-page1.txt: [('origi', 'nated')]
AmSn18900227-V05-09-page2.txt: [('Ken', 'tucky'), ('AMER', 'ICAN'), ('Com', 'mittee')]
AmSn18900227-V05-09-page3.txt: [('SEC', 'TION')]
AmSn18900227-V05-09-page7.txt: [('a', 'nd')]
AmSn18900306-V05-10-page1.txt: [('Pa', 'se')]
AmSn18900306-V05-10-page2.txt: [('Do', 'uay'), ('Wis', 'consin'), ('Roman', 'Ce')]
AmSn18900306-V05-10-page6.txt: [('man', 'IC')]
AmSn18900306-V05-10-page7.txt: [('E', 'RI')]
AmSn18900313-V05-11-page4.txt: [('pa', 'rt')]
AmSn18900313-V05-11-page6.txt: [('evi', 'dently')]
AmSn18900313-V05-11-page7.txt: [('AMER', 'ICAN')]
AmSn18900320-V05-12-page6.txt: [('SEN', 'TINEL')]
AmSn18900327-V05-13-page2.txt: [('o', 'wn')]
AmSn18900327-V05-13-page7.txt: [('Y', 'es')]
AmSn18900403-V05-14-page2.txt: [('syS', 'tem')]
AmSn18900410-V05-15-page3.txt: [('neces', 'sArily')]
AmSn18900410-V05-15-page4.txt: [('par', 'se')]
AmSn18900410-V05-15-page6.txt: [('ques', 'tion')]
AmSn18900410-V05-15-page7.txt: [('M', 'ILLIONS')]
AmSn18900417-V05-16-page1.txt: [('sup', 'pression'), ('V', 'OLUME')]
AmSn18900417-V05-16-page6.txt: [('s', 'chool')]
AmSn18900417-V05-16-page7.txt: [('M', 'ILLIONS')]
AmSn18900417-V05-16-page8.txt: [('Cath', 'olic')]
AmSn18900424-V05-17-page7.txt: [('eve', 'ryone'), ('W', 'orld')]
AmSn18900501-V05-18-page1.txt: [('til', 'th')]
AmSn18900501-V05-18-page2.txt: [('on', 'ly'), ('Chris', 'tians')]
AmSn18900501-V05-18-page6.txt: [('objec', 'tionable')]
AmSn18900508-V05-19-page3.txt: [('S', 'ENTINEL')]
AmSn18900515-V05-20-page1.txt: [('minis', 'ters')]
AmSn18900515-V05-20-page5.txt: [('i', 'mportant')]
AmSn18900515-V05-20-page7.txt: [('eve', 'ryone')]
AmSn18900522-V05-21-page2.txt: [('Chr', 'ist'), ('right', 'eousness')]
AmSn18900522-V05-21-page3.txt: [('con', 'sume')]
AmSn18900529-V05-22-page2.txt: [('speak', 'easies')]
AmSn18900529-V05-22-page7.txt: [('W', 'onderful')]
AmSn18900605-V05-23-page7.txt: [('W', 'orld')]
AmSn18900612-V05-24-page3.txt: [('govern', 'Ment')]
AmSn18900612-V05-24-page7.txt: [('at', 'tached')]
AmSn18900612-V05-24-page8.txt: [('state', 'ment')]
AmSn18900619-V05-25-page2.txt: [('spe', 'cifically')]
AmSn18900619-V05-25-page7.txt: [('M', 'IGHTY')]
AmSn18900626-V05-26-page7.txt: [('hea', 'th'), ('M', 'IGHTY')]
AmSn18900717-V05-28-page6.txt: [('protec', 'tion')]
AmSn18900724-V05-29-page1.txt: [('govern', 'ments')]
AmSn18900724-V05-29-page7.txt: [('the', 'ba')]
AmSn18900731-V05-30-page1.txt: [('SEN', 'TINEL')]
AmSn18900731-V05-30-page2.txt: [('a', 'lso')]
AmSn18900731-V05-30-page6.txt: [('o', 'ne')]
AmSn18900814-V05-32-page4.txt: [('consta', 'bles'), ('a', 'boriginal')]
AmSn18900814-V05-32-page8.txt: [('G', 'overnment')]
AmSn18900821-V05-33-page5.txt: [('beau', 'tiful')]
AmSn18900821-V05-33-page7.txt: [('invest', 'ment'), ('sten', 'cil'), ('at', 'tached')]
AmSn18900828-V05-34-page7.txt: [('Ob', 'ject'), ('at', 'tached')]
AmSn18900904-V05-35-page7.txt: [('P', 'IE')]
AmSn18900911-V05-36-page2.txt: [('in', 'hibiting')]
AmSn18900911-V05-36-page3.txt: [('SEN', 'TINEL')]
AmSn18900918-V05-37-page1.txt: [('B', 'ib'), ('a', 'griC'), ('o', 'ft'), ('m', 'ay'), ('se', 'cy'), ('t', 'Ia')]
AmSn18900918-V05-37-page4.txt: [('i', 'asi'), ('s', 'IL'), ('P', 'EtIt'), ('A', 'te'), ('m', 'Ap'), ('t', 'oi'), ('b', 'ei')]
AmSn18900918-V05-37-page5.txt: [('lA', 'rd')]
AmSn18900918-V05-37-page6.txt: [('o', 'ut')]
AmSn18900918-V05-37-page8.txt: [('era', 'th'), ('n', 'ip'), ('e', 'ying'), ('d', 'id'), ('s', 'op')]
AmSn18900925-V05-38-page8.txt: [('SEN', 'TINEL'), ('over', 'whelmingly')]
AmSn18901009-V05-40-page6.txt: [('AMER', 'ICAN')]
AmSn18901016-V05-41-page4.txt: [('regula', 'tions'), ('legisla', 'tion')]
AmSn18901016-V05-41-page5.txt: [('amuse', 'ments')]
AmSn18901016-V05-41-page6.txt: [('CON', 'SERVATOR')]
AmSn18901016-V05-41-page7.txt: [('W', 'orkings')]
AmSn18901023-V05-42-page1.txt: [('BY', 'TES')]
AmSn18901023-V05-42-page2.txt: [('a', 'nd'), ('stud', 'ies'), ('to', 'tal')]
AmSn18901023-V05-42-page3.txt: [('to', 're')]
AmSn18901023-V05-42-page4.txt: [('a', 'bOVe'), ('a', 'nd')]
AmSn18901023-V05-42-page6.txt: [('cap', 'tured'), ('move', 'ment')]
AmSn18901030-V05-43-page1.txt: [('Living', 'ston')]
AmSn18901030-V05-43-page3.txt: [('Hollow', 'ay'), ('author', 'ities')]
AmSn18901030-V05-43-page5.txt: [('AMER', 'ICA')]
AmSn18901106-V05-44-page3.txt: [('SEN', 'TINEL'), ('in', 'Volves')]
AmSn18901106-V05-44-page7.txt: [('con', 'Stantly')]
AmSn18901113-V05-45-page2.txt: [('P', 'ress'), ('Govern', 'Ment')]
AmSn18901113-V05-45-page7.txt: [('A', 'ND')]
AmSn18901120-V05-46-page3.txt: [('ra', 'pidly')]
AmSn18901127-V05-47-page4.txt: [('me', 'morialize'), ('per', 'fect')]
AmSn18901127-V05-47-page7.txt: [('con', 'stantly'), ('T', 'ams')]
AmSn18901127-V05-47-page8.txt: [('rega', 'rds')]
AmSn18901204-V05-48-page8.txt: [('neces', 'sary')]
AmSn18901211-V05-49-page4.txt: [('repugna', 'nt')]
AmSn18901211-V05-49-page7.txt: [('con', 'stantly')]
AmSn18901218-V05-50-page3.txt: [('minor', 'ity'), ('theol', 'ogy'), ('rewa', 'rds'), ('the', 're')]
AmSn18901218-V05-50-page5.txt: [('the', 'refore')]
AmSn18901218-V05-50-page6.txt: [('d', 'ays')]
AmSn18901218-V05-50-page9.txt: [('con', 'vention'), ('B', 'ible')]
AmSn18910101-V06-01-page1.txt: [('SEN', 'TINEL')]
AmSn18910101-V06-01-page3.txt: [('w', 'ould')]
AmSn18910101-V06-01-page4.txt: [('sup', 'pression')]
AmSn18910101-V06-01-page5.txt: [('tor', 'tures')]
AmSn18910108-V06-02-page6.txt: [('legisla', 'tion')]
AmSn18910115-V06-03-page3.txt: [('SEN', 'TINEL'), ('pros', 'pered')]
AmSn18910115-V06-03-page4.txt: [('A', 'merican')]
AmSn18910115-V06-03-page7.txt: [('pe', 'ns'), ('l', 'eather')]
AmSn18910115-V06-03-page8.txt: [('a', 'nd'), ('AMER', 'ICAN')]
AmSn18910122-V06-04-page2.txt: [('t', 'iti'), ('pro', 'nounced')]
AmSn18910122-V06-04-page6.txt: [('prop', 'erty')]
AmSn18910122-V06-04-page8.txt: [('AMER', 'ICAN')]
AmSn18910129-V06-05-page2.txt: [('de', 'manded')]
AmSn18910129-V06-05-page3.txt: [('per', 'ception'), ('in', 'stinct')]
AmSn18910129-V06-05-page4.txt: [('Govern', 'Ment')]
AmSn18910129-V06-05-page7.txt: [('Y', 'ork'), ('devel', 'opment')]
AmSn18910212-V06-07-page3.txt: [('PE', 'TITIONED'), ('Leg', 'islature')]
AmSn18910212-V06-07-page4.txt: [('An', 'oa'), ('AMER', 'ICAN')]
AmSn18910212-V06-07-page8.txt: [('anniver', 'sary')]
AmSn18910219-V06-08-page3.txt: [('SEN', 'TINEL')]
AmSn18910219-V06-08-page7.txt: [('w', 'ork')]
AmSn18910226-V06-09-page7.txt: [('w', 'ork')]
AmSn18910226-V06-09-page8.txt: [('de', 'Cided')]
AmSn18910305-V06-10-page4.txt: [('num', 'ber')]
AmSn18910305-V06-10-page6.txt: [('mani', 'festation'), ('as', 'sured')]
AmSn18910305-V06-10-page8.txt: [('AMER', 'ICAN')]
AmSn18910319-V06-12-page6.txt: [('A', 'MERICAN')]
AmSn18910319-V06-12-page7.txt: [('W', 'ORSHIP')]
AmSn18910326-V06-13-page8.txt: [('worsh', 'ip')]
AmSn18910402-V06-14-page2.txt: [('SEN', 'TINEL')]
AmSn18910402-V06-14-page4.txt: [('a', 'bl')]
AmSn18910402-V06-14-page8.txt: [('A', 'MERICAN'), ('inter', 'ests')]
AmSn18910409-V06-15-page4.txt: [('the', 'se')]
AmSn18910409-V06-15-page7.txt: [('a', 'ges')]
AmSn18910416-V06-16-page1.txt: [('In', 'ti')]
AmSn18910416-V06-16-page2.txt: [('state', 'ment')]
AmSn18910416-V06-16-page3.txt: [('A', 'nd')]
AmSn18910416-V06-16-page8.txt: [('Y', 'ork')]
AmSn18910423-V06-17-page4.txt: [('infringe', 'ment')]
AmSn18910423-V06-17-page6.txt: [('a', 'nd')]
AmSn18910423-V06-17-page8.txt: [('the', 'se'), ('con', 'gregation')]
AmSn18910430-V06-18-page7.txt: [('THE', 'Ca')]
AmSn18910507-V06-19-page5.txt: [('AM', 'ERICAN')]
AmSn18910507-V06-19-page8.txt: [('A', 'MERICAN')]
AmSn18910514-V06-20-page3.txt: [('op', 'portunity')]
AmSn18910514-V06-20-page7.txt: [('at', 'tached')]
AmSn18910521-V06-21-page4.txt: [('infringe', 'ment')]
AmSn18910521-V06-21-page7.txt: [('F', 'iNe'), ('W', 'ith')]
AmSn18910604-V06-23-page6.txt: [('AMER', 'ICAN')]
AmSn18910604-V06-23-page8.txt: [('for', 'th')]
AmSn18910611-V06-24-page2.txt: [('SEN', 'TINEL'), ('A', 'ugustus')]
AmSn18910618-V06-25-page6.txt: [('Com', "mittee's"), ('AM', 'ERICAN')]
AmSn18910618-V06-25-page7.txt: [('Y', 'ORK')]
AmSn18910625-V06-26-page3.txt: [('pun', 'ished')]
AmSn18910625-V06-26-page4.txt: [('in', 'ti')]
AmSn18910625-V06-26-page5.txt: [('prac', 'tically')]
AmSn18910625-V06-26-page7.txt: [('for', 'te')]
AmSn18910709-V06-27-page3.txt: [('con', 'versant')]
AmSn18910709-V06-27-page6.txt: [('to', 'co'), ('thor', 'oughly')]
AmSn18910709-V06-27-page7.txt: [('t', 'iA')]
AmSn18910709-V06-27-page8.txt: [('frater', 'nity')]
AmSn18910716-V06-28-page1.txt: [('the', 're')]
AmSn18910716-V06-28-page6.txt: [('C', 'urt')]
AmSn18910716-V06-28-page8.txt: [('SEN', 'TINEL')]
AmSn18910806-V06-31-page6.txt: [('Amer', 'ican')]
AmSn18910827-V06-34-page5.txt: [('Chris', 'tian')]
AmSn18910827-V06-34-page7.txt: [('HEN', 'RY')]
AmSn18910827-V06-34-page8.txt: [('min', 'ister')]
AmSn18910903-V06-35-page1.txt: [('Govern', 'ment')]
AmSn18910903-V06-35-page2.txt: [('prin', 'ciple')]
AmSn18910903-V06-35-page5.txt: [('Ex', 'amples')]
AmSn18910903-V06-35-page6.txt: [('de', 'tectives'), ('AMER', 'ICAN')]
AmSn18910903-V06-35-page7.txt: [('Ca', 'Ns'), ('W', 'itchcraft'), ('to', 'ld')]
AmSn18910910-V06-36-page1.txt: [('legisla', 'tion')]
AmSn18910910-V06-36-page4.txt: [('quota', 'tions'), ('ma', 'jority')]
AmSn18910910-V06-36-page5.txt: [('invec', 'tives'), ('per', 'se'), ('char', 'acterizes'), ('con', 'dition'), ('condi', 'tion'), ('Chris', 'tian')]
AmSn18910910-V06-36-page6.txt: [('hun', 'dred')]
AmSn18910910-V06-36-page7.txt: [('P', 'ress')]
AmSn18910917-V06-37-page3.txt: [('be', 'lieves'), ('end', 'ureth'), ('man', 'agers')]
AmSn18910917-V06-37-page6.txt: [('pro', 'tects'), ('Gov', 'ernor'), ('sym', 'pathy'), ('Chris', 'tian')]
AmSn18910917-V06-37-page7.txt: [('Or', 'namental')]
AmSn18910924-V06-38-page2.txt: [('min', 'ister')]
AmSn18910924-V06-38-page5.txt: [('per', 'secution')]
AmSn18910924-V06-38-page7.txt: [('a', 'nd')]
AmSn18911022-V06-41-page1.txt: [('in', 'Sisted')]
AmSn18911022-V06-41-page3.txt: [('SEN', 'TINEL')]
AmSn18911022-V06-41-page4.txt: [('per', 'se')]
AmSn18911022-V06-41-page6.txt: [('agree', 'ment'), ('Camp', 'bellites')]
AmSn18911022-V06-41-page8.txt: [('state', 'ment'), ('a', 'llow')]
AmSn18911112-V06-44-page8.txt: [('T', 'IE')]
AmSn18911126-V06-46-page4.txt: [('expres', 'sed')]
AmSn18911126-V06-46-page8.txt: [('legal', 'ize')]
AmSn18911203-V06-47-page1.txt: [('SEN', 'TINEL')]
AmSn18911203-V06-47-page3.txt: [('IN', 'STITUTION')]
AmSn18911203-V06-47-page7.txt: [('the', 'ba'), ('me', 'chanics')]
AmSn18911210-V06-48-page6.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER')]
AmSn18911217-V06-49-page4.txt: [('christian', 'ized')]
AmSn18911217-V06-49-page5.txt: [('LIV', 'INGSTONE'), ('differ', 'ent')]
AmSn18911224-V06-50-page2.txt: [('persecu', 'tion')]
AmSn18911224-V06-50-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('LIV', 'INGSTONE')]
AmSn18920107-V07-01-page5.txt: [('reli', 'gious')]
AmSn18920107-V07-01-page6.txt: [('S', 'te')]
AmSn18920107-V07-01-page7.txt: [('Pi', 'oneer')]
AmSn18920107-V07-01-page8.txt: [('A', 'ny')]
AmSn18920114-V07-02-page5.txt: [('nefa', 'rious')]
AmSn18920114-V07-02-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('HEN', 'RY')]
AmSn18920121-V07-03-page3.txt: [('see', 'th'), ('elo', 'quently')]
AmSn18920128-V07-04-page5.txt: [('rec', 'ognize')]
AmSn18920128-V07-04-page7.txt: [('HEN', 'RY'), ('diction', 'ary'), ('FOR', 'EIGN')]
AmSn18920204-V07-05-page5.txt: [('P', 'ig'), ('a', 'cc'), ('A', 'CC'), ('o', 'ct'), ('b', 'id')]
AmSn18920204-V07-05-page6.txt: [('SEN', 'TINEL'), ('to', 're')]
AmSn18920204-V07-05-page7.txt: [('ORIGIN', 'AL'), ('HEN', 'RY'), ('GAZ', 'ETTEER')]
AmSn18920211-V07-06-page5.txt: [('tri', 'bunal')]
AmSn18920211-V07-06-page7.txt: [('e', 'LI'), ('HEN', 'RY'), ('diction', 'ary'), ('FOR', 'EIGN')]
AmSn18920218-V07-07-page2.txt: [('be', 'lial'), ('Chris', 'tianity'), ('indict', 'ment')]
AmSn18920218-V07-07-page3.txt: [('fr', 'om')]
AmSn18920218-V07-07-page6.txt: [('pub', 'lished')]
AmSn18920218-V07-07-page7.txt: [('FOR', 'EIGN')]
AmSn18920225-V07-08-page7.txt: [('GAZ', 'ETTEER')]
AmSn18920303-V07-09-page7.txt: [('DICTION', 'ARY'), ('diction', 'ary'), ('miner', 'als'), ('Chris', 'tianity')]
AmSn18920310-V07-10-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')]
AmSn18920317-V07-11-page7.txt: [('diction', 'ary')]
AmSn18920324-V07-12-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')]
AmSn18920331-V07-13-page1.txt: [('indi', 'cates')]
AmSn18920331-V07-13-page3.txt: [('A', 'fter')]
AmSn18920331-V07-13-page5.txt: [('by', 're')]
AmSn18920407-V07-14-page7.txt: [('GEN', 'ERAL')]
AmSn18920414-V07-15-page2.txt: [('SEN', 'TINEL'), ('the', 'se')]
AmSn18920414-V07-15-page4.txt: [('com', 'mittee')]
AmSn18920414-V07-15-page7.txt: [('Am', 'erican')]
AmSn18920428-V07-17-page7.txt: [('l', 'imes'), ('t', 'ok')]
AmSn18920505-V07-18-page3.txt: [('f', 'oresaw')]
AmSn18920505-V07-18-page4.txt: [('in', 'corporating')]
AmSn18920505-V07-18-page5.txt: [('AME', 'RICAN')]
AmSn18920519-V07-20-page6.txt: [('w', 'hich')]
AmSn18920519-V07-20-page7.txt: [('DICTION', 'ARY'), ('diction', 'ary')]
AmSn18920526-V07-21-page3.txt: [('in', 'asmuch')]
AmSn18920526-V07-21-page4.txt: [('author', 'ities')]
AmSn18920526-V07-21-page5.txt: [('CO', 'CO'), ('In', 'teresting'), ('CO', 'Co'), ('it', 'al'), ('Go', 'vernment')]
AmSn18920526-V07-21-page7.txt: [('FOR', 'EIGN')]
AmSn18920602-V07-22-page4.txt: [('the', 're')]
AmSn18920602-V07-22-page7.txt: [('in', 'hibition')]
AmSn18920609-V07-23-page1.txt: [('Con', 'gress')]
AmSn18920609-V07-23-page3.txt: [('T', 'iE')]
AmSn18920616-V07-24-page7.txt: [('H', 'Id'), ('H', 'UI')]
AmSn18920623-V07-25-page2.txt: [('dis', 'turbed')]
AmSn18920623-V07-25-page4.txt: [('gen', 'erally'), ('w', 'ith')]
AmSn18920630-V07-26-page5.txt: [('AMER', 'ICAN')]
AmSn18920630-V07-26-page8.txt: [('SEN', 'TINEL')]
AmSn18920714-V07-27-page6.txt: [('SEN', 'TINEL'), ('i', 'ncline')]
AmSn18920721-V07-28-page7.txt: [('In', 'ca'), ('FOR', 'EIGN')]
AmSn18920728-V07-29-page3.txt: [('Con', 'stitution')]
AmSn18920728-V07-29-page5.txt: [('to', 'te')]
AmSn18920728-V07-29-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('diction', 'ary')]
AmSn18920728-V07-29-page8.txt: [('men', 'pleasers')]
AmSn18920804-V07-30-page7.txt: [('DICTION', 'ARY'), ('miner', 'als')]
AmSn18920811-V07-31-page3.txt: [('a', 'gainst')]
AmSn18920811-V07-31-page4.txt: [('Expo', 'sition'), ('he', 're')]
AmSn18920811-V07-31-page7.txt: [('GAZ', 'ETTEER'), ('diction', 'ary')]
AmSn18920818-V07-32-page7.txt: [('FOR', 'EIGN')]
AmSn18920818-V07-32-page8.txt: [('win', 'ce')]
AmSn18920825-V07-33-page2.txt: [('to', 're')]
AmSn18920825-V07-33-page3.txt: [('per', 'se')]
AmSn18920825-V07-33-page7.txt: [('r', 'efer')]
AmSn18920825-V07-33-page8.txt: [('annoy', 'ance')]
AmSn18920901-V07-34-page1.txt: [('E', 'qual')]
AmSn18920901-V07-34-page3.txt: [('con', 'nection')]
AmSn18920901-V07-34-page4.txt: [('four', 'teenth'), ('and', 're')]
AmSn18920901-V07-34-page7.txt: [('GAZ', 'ETTEER'), ('miner', 'als')]
AmSn18920908-V07-35-page8.txt: [('in', 'delibly')]
AmSn18920915-V07-36-page7.txt: [('HEN', 'RY')]
AmSn18920915-V07-36-page8.txt: [('per', 'mitted')]
AmSn18920929-V07-38-page6.txt: [('and', 'es')]
AmSn18920929-V07-38-page7.txt: [('FOR', 'EIGN'), ('in', 'vited')]
AmSn18921006-V07-39-page7.txt: [('DICTION', 'ARY'), ('diction', 'ary')]
AmSn18921006-V07-39-page8.txt: [('o', 'ne')]
AmSn18921013-V07-40-page1.txt: [('w', 'hich')]
AmSn18921020-V07-41-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')]
AmSn18921027-V07-42-page1.txt: [('AMER', 'ICAN')]
AmSn18921027-V07-42-page7.txt: [('Christian', 'ity'), ('DICTION', 'ARY'), ('FOR', 'EIGN')]
AmSn18921103-V07-43-page4.txt: [('m', 'eeting')]
AmSn18921110-V07-44-page8.txt: [('S', 'tE')]
AmSn18921117-V07-45-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')]
AmSn18921117-V07-45-page8.txt: [('per', 'centage')]
AmSn18921124-V07-46-page7.txt: [('leg', 'islation')]
AmSn18921201-V07-47-page7.txt: [('GAZ', 'ETTEER'), ('FOR', 'EIGN')]
AmSn18921208-V07-48-page7.txt: [('SEN', 'TINEL')]
AmSn18921208-V07-48-page8.txt: [('to', 'ft')]
AmSn18921215-V07-49-page2.txt: [('AMER', 'ICAN')]
AmSn18921215-V07-49-page7.txt: [('HEN', 'RY')]
AmSn18921215-V07-49-page8.txt: [('St', 'atesman'), ('St', 'evenson'), ('T', 'IE'), ('St', 'ates')]
AmSn18921222-V07-50-page1.txt: [('LIB', 'ERTY')]
AmSn18921222-V07-50-page8.txt: [('SEN', 'TINEL')]
AmSn18930112-V08-02-page2.txt: [('SEN', 'TINEL')]
AmSn18930112-V08-02-page7.txt: [('H', 'AL')]
AmSn18930112-V08-02-page8.txt: [('non', 'feasance'), ('Jan', 'uary'), ('add', 'ress')]
AmSn18930119-V08-03-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('DICTION', 'ARY')]
AmSn18930126-V08-04-page7.txt: [('C', 'reola'), ('t', 'ome')]
AmSn18930126-V08-04-page8.txt: [('add', 'ress')]
AmSn18930202-V08-05-page6.txt: [('w', 'orld')]
AmSn18930209-V08-06-page4.txt: [('A', 'lk')]
AmSn18930209-V08-06-page6.txt: [('Ines', 'timable')]
AmSn18930209-V08-06-page7.txt: [('hodge', 'podge')]
AmSn18930216-V08-07-page6.txt: [('C', 'hristian'), ('N', 'ation'), ('A', 're')]
AmSn18930216-V08-07-page7.txt: [('AMER', 'ICAN')]
AmSn18930309-V08-10-page7.txt: [('at', 'torney'), ('THE', 'CA')]
AmSn18930316-V08-11-page5.txt: [('per', 'se')]
AmSn18930316-V08-11-page7.txt: [('DICTION', 'ARY')]
AmSn18930406-V08-14-page3.txt: [('exemp', 'tion')]
AmSn18930406-V08-14-page7.txt: [('V', 'IC'), ('i', 'NT'), ('FUR', 'NISHED')]
AmSn18930406-V08-14-page8.txt: [('add', 'ress')]
AmSn18930413-V08-15-page5.txt: [('l', 'aw')]
AmSn18930420-V08-16-page7.txt: [('DICTION', 'ARY')]
AmSn18930427-V08-17-page2.txt: [('JOSE', 'PH')]
AmSn18930504-V08-18-page7.txt: [('DICTION', 'ARY'), ('FUR', 'NISHED')]
AmSn18930518-V08-20-page7.txt: [('DICTION', 'ARY')]
AmSn18930601-V08-22-page7.txt: [('GAZ', 'ETTEER'), ('gaz', 'etteer')]
AmSn18930608-V08-23-page4.txt: [('w', 'orship')]
AmSn18930608-V08-23-page6.txt: [('lib', 'erty')]
AmSn18930615-V08-24-page5.txt: [('con', 'stitutionality')]
AmSn18930615-V08-24-page8.txt: [('SEN', 'TINEL')]
AmSn18930629-V08-26-page8.txt: [('A', 'MI')]
AmSn18930706-V08-27-page2.txt: [('a', 'fferent')]
AmSn18930706-V08-27-page3.txt: [('per', 'secution')]
AmSn18930706-V08-27-page6.txt: [('h', 'eretics')]
AmSn18930706-V08-27-page7.txt: [('DICTION', 'ARY')]
AmSn18930720-V08-29-page4.txt: [('mission', 'ary')]
AmSn18930720-V08-29-page8.txt: [('rep', 'utation'), ('c', 'ato')]
AmSn18930727-V08-30-page2.txt: [('amend', 'ments'), ('AMER', 'ICAN')]
AmSn18930803-V08-31-page7.txt: [('W', 'afers'), ('diction', 'ary')]
AmSn18930810-V08-32-page3.txt: [('Wo', "rld's")]
AmSn18930810-V08-32-page5.txt: [('In', 'terior')]
AmSn18930810-V08-32-page8.txt: [('Haw', 'keye')]
AmSn18930817-V08-33-page6.txt: [('per', 'se')]
AmSn18930831-V08-34-page2.txt: [('van', 'ity')]
AmSn18930831-V08-34-page5.txt: [('per', 'se')]
AmSn18930831-V08-34-page8.txt: [('pro', 'hibited')]
AmSn18930907-V08-35-page2.txt: [('a', 'bstract')]
AmSn18930914-V08-36-page4.txt: [('hi', 'gher'), ('REESTABLISH', 'MENT')]
AmSn18930921-V08-37-page4.txt: [('the', 'Es')]
AmSn18930928-V08-38-page5.txt: [('SEN', 'TINEL')]
AmSn18930928-V08-38-page7.txt: [('r', 'esearches')]
AmSn18930928-V08-38-page8.txt: [('m', 'ade')]
AmSn18931012-V08-40-page2.txt: [('SEN', 'TINEL')]
AmSn18931012-V08-40-page3.txt: [('mod', 'ern'), ('PUN', 'ISHING')]
AmSn18931012-V08-40-page7.txt: [('DICTION', 'ARY'), ('sub', 'stantially')]
AmSn18931019-V08-41-page7.txt: [('P', 'UBLIC'), ('W', 'ork'), ('C', 'oncluding'), ('de', 'scription')]
AmSn18931026-V08-42-page2.txt: [('are', 'Ca'), ('Ca', 'sar')]
AmSn18931026-V08-42-page3.txt: [('dec', 'laration')]
AmSn18931026-V08-42-page6.txt: [('r', 'ather')]
AmSn18931026-V08-42-page7.txt: [('GEN', 'ERAL'), ('HEN', 'RY'), ('GAZ', 'ETTEER')]
AmSn18931026-V08-42-page8.txt: [('o', 'ne')]
AmSn18931102-V08-43-page4.txt: [('hob', 'nobbing')]
AmSn18931102-V08-43-page8.txt: [('to', 'co')]
AmSn18931109-V08-44-page2.txt: [('sub', 'jects')]
AmSn18931109-V08-44-page8.txt: [('j', 'udgment')]
AmSn18931116-V08-45-page1.txt: [('phys', 'ically')]
AmSn18931116-V08-45-page3.txt: [('NO', 'VEMBER')]
AmSn18931116-V08-45-page7.txt: [('p', 'es')]
AmSn18931116-V08-45-page8.txt: [('SEN', 'TINEL')]
AmSn18931123-V08-46-page1.txt: [('are', 'Ca')]
AmSn18931123-V08-46-page2.txt: [('SEN', 'TINEL')]
AmSn18931123-V08-46-page4.txt: [('light', 'heartedness'), ('per', 'se')]
AmSn18931130-V08-47-page1.txt: [('contempo', 'raneous')]
AmSn18931130-V08-47-page6.txt: [('threat', 'ened')]
AmSn18931207-V08-48-page1.txt: [('char', 'acter'), ('and', 're'), ('trans', 'gress')]
AmSn18931207-V08-48-page4.txt: [('pro', 'tected'), ('pro', 'Vides'), ('establish', 'Ment')]
AmSn18931214-V08-49-page5.txt: [('SEN', 'TINEL'), ('B', 'arabbas')]
AmSn18931221-V08-50-page3.txt: [('heart', 'ily')]
AmSn18931221-V08-50-page8.txt: [('ten', 'ts'), ('A', 'ccording')]
AmSn18940104-V09-01-page1.txt: [('SEN', 'TINEL')]
AmSn18940104-V09-01-page2.txt: [('con', 'tinent')]
AmSn18940104-V09-01-page8.txt: [('c', 'id')]
AmSn18940111-V09-02-page4.txt: [('o', 'rder')]
AmSn18940118-V09-03-page2.txt: [('abso', 'lutely')]
AmSn18940118-V09-03-page6.txt: [('Chris', 'tian')]
AmSn18940125-V09-04-page2.txt: [('enthu', 'siastic')]
AmSn18940125-V09-04-page7.txt: [('P', 'UBLIC')]
AmSn18940201-V09-05-page1.txt: [('Y', 'ork')]
AmSn18940201-V09-05-page2.txt: [('bus', 'es')]
AmSn18940201-V09-05-page6.txt: [('d', 'ay')]
AmSn18940208-V09-06-page2.txt: [('cha', 'mpion')]
AmSn18940308-V09-10-page2.txt: [('A', 'NT')]
AmSn18940315-V09-11-page1.txt: [('SEN', 'TINEL')]
AmSn18940315-V09-11-page2.txt: [('to', 'Ca')]
AmSn18940322-V09-12-page6.txt: [('Chris', 'tian')]
AmSn18940322-V09-12-page7.txt: [('diction', 'ary')]
AmSn18940329-V09-13-page3.txt: [('tes', 'tifies')]
AmSn18940329-V09-13-page4.txt: [('A', 'nd')]
AmSn18940329-V09-13-page7.txt: [('GAZ', 'ETTEER')]
AmSn18940405-V09-14-page2.txt: [('CA', 'NT')]
AmSn18940405-V09-14-page7.txt: [('de', 'claring'), ('GAZ', 'ETTEER')]
AmSn18940412-V09-15-page2.txt: [('a', 'll')]
AmSn18940412-V09-15-page6.txt: [('reli', 'gious')]
AmSn18940412-V09-15-page8.txt: [('w', 'ork')]
AmSn18940419-V09-16-page7.txt: [('GAZ', 'ETTEER')]
AmSn18940426-V09-17-page5.txt: [('Con', 'stitutional')]
AmSn18940503-V09-18-page4.txt: [('u', 'nited')]
AmSn18940503-V09-18-page5.txt: [('bay', 'onet')]
AmSn18940503-V09-18-page7.txt: [('Y', 'es')]
AmSn18940510-V09-19-page5.txt: [('SEN', 'TINEL'), ('author', 'ity')]
AmSn18940524-V09-21-page6.txt: [('u', 'pon')]
AmSn18940524-V09-21-page8.txt: [('to', 'co'), ('w', 'ith')]
AmSn18940531-V09-22-page3.txt: [('present', 'ation')]
AmSn18940607-V09-23-page6.txt: [('far', 'cical'), ('per', 'mitted')]
AmSn18940614-V09-24-page2.txt: [('CON', 'STITUTIONAL')]
AmSn18940614-V09-24-page3.txt: [('LIB', 'ERTY'), ('CON', 'TROL'), ('ins', 'tructions'), ('V', 'ulture')]
AmSn18940614-V09-24-page7.txt: [('sub', 'stantial')]
AmSn18940621-V09-25-page5.txt: [('SEN', 'TINEL')]
AmSn18940621-V09-25-page7.txt: [('sub', 'stantial'), ('diction', 'ary')]
AmSn18940628-V09-26-page3.txt: [('polit', 'ical')]
AmSn18940628-V09-26-page5.txt: [('in', 'jects'), ('The', 'osophic')]
AmSn18940712-V09-28-page1.txt: [('pro', 'duction'), ('govern', 'ment')]
AmSn18940719-V09-29-page5.txt: [('he', 're')]
AmSn18940726-V09-30-page1.txt: [('JON', 'ES')]
AmSn18940802-V09-31-page4.txt: [('com', 'plex')]
AmSn18940823-V09-33-page3.txt: [('SEN', 'TINEL')]
AmSn18940823-V09-33-page4.txt: [('Switz', 'erland')]
AmSn18940823-V09-33-page6.txt: [('Ch', 'ristian')]
AmSn18940830-V09-34-page2.txt: [('e', 'ther')]
AmSn18940830-V09-34-page5.txt: [('pro', 'duced')]
AmSn18940830-V09-34-page7.txt: [('PRO', 'PHECY')]
AmSn18940906-V09-35-page3.txt: [('shame', 'ful')]
AmSn18940906-V09-35-page7.txt: [('GEN', 'ERAL')]
AmSn18940913-V09-36-page3.txt: [('contra', 'dictions')]
AmSn18940913-V09-36-page6.txt: [('M', 'inor')]
AmSn18940920-V09-37-page2.txt: [('in', 'vite'), ('contra', 'ry')]
AmSn18940920-V09-37-page3.txt: [('con', 'trol')]
AmSn18940927-V09-38-page6.txt: [('H', 'AL'), ('A', 'NT')]
AmSn18941004-V09-39-page1.txt: [('a', 'reli'), ('reli', 'gious')]
AmSn18941004-V09-39-page4.txt: [('Depart', 'ment')]
AmSn18941004-V09-39-page5.txt: [('s', 'ap'), ('la', 'gs'), ('E', 'gg'), ('P', 'ct')]
AmSn18941004-V09-39-page7.txt: [('por', 'trayal'), ('A', 'tE'), ('congregation', 'al')]
AmSn18941004-V09-39-page8.txt: [('a', 'id')]
AmSn18941011-V09-40-page2.txt: [('bu', 'ilded')]
AmSn18941018-V09-41-page2.txt: [('foun', 'dation')]
AmSn18941025-V09-42-page6.txt: [('C', 'tr')]
AmSn18941025-V09-42-page7.txt: [('regis', 'tered')]
AmSn18941025-V09-42-page8.txt: [('AMER', 'ICAN')]
AmSn18941115-V09-45-page2.txt: [('pro', 'phets')]
AmSn18941115-V09-45-page4.txt: [('gov', 'ernment')]
AmSn18941115-V09-45-page7.txt: [('pro', 'fusely')]
AmSn18941115-V09-45-page8.txt: [('el', 'se'), ('to', 'co')]
AmSn18941122-V09-46-page5.txt: [('well', 'es'), ('com', 'munity')]
AmSn18941129-V09-47-page2.txt: [('enforce', 'th')]
AmSn18941129-V09-47-page7.txt: [('R', 'eligious'), ('W', 'eekly')]
AmSn18941213-V09-49-page3.txt: [('Pa', 'ternoster')]
AmSn18941213-V09-49-page4.txt: [('f', 'ainteth')]
AmSn18941220-V09-50-page10.txt: [('Bap', 'tists')]
AmSn18941220-V09-50-page6.txt: [('to', 'ning'), ('the', 're'), ('sew', 'erage')]
AmSn18941220-V09-50-page7.txt: [('stra', 'nger')]
AmSn18941220-V09-50-page8.txt: [('SEN', 'TINEL')]
AmSn18941220-V09-50-page9.txt: [('TO', 'TEM')]
AmSn18950103-V10-01-page1.txt: [('SEN', 'TINEL'), ('ac', 'complished')]
AmSn18950103-V10-01-page7.txt: [('H', 'ELPFUL')]
AmSn18950110-V10-02-page1.txt: [('contempora', 'ry'), ('e', 'ternally'), ('pun', 'ishable')]
AmSn18950110-V10-02-page2.txt: [('faith', 'ful')]
AmSn18950110-V10-02-page4.txt: [('amend', 'ments')]
AmSn18950117-V10-03-page7.txt: [('in', 'quirer')]
AmSn18950117-V10-03-page8.txt: [('Prot', 'estant'), ('AMER', 'ICAN')]
AmSn18950124-V10-04-page2.txt: [('Rom', 'anists')]
AmSn18950124-V10-04-page3.txt: [('ques', 'tions')]
AmSn18950124-V10-04-page7.txt: [('G', 'raphic'), ('W', 'ith')]
AmSn18950124-V10-04-page8.txt: [('AMER', 'ICAN')]
AmSn18950131-V10-05-page3.txt: [('M', 'KT')]
AmSn18950131-V10-05-page4.txt: [('Le', 'Iter')]
AmSn18950131-V10-05-page7.txt: [('SCRIP', 'TURAL'), ('Y', 'ork')]
AmSn18950214-V10-07-page1.txt: [('SEN', 'TINEL')]
AmSn18950214-V10-07-page4.txt: [('the', 're')]
AmSn18950214-V10-07-page6.txt: [('Govern', 'ment')]
AmSn18950221-V10-08-page1.txt: [('SEN', 'TINEL')]
AmSn18950221-V10-08-page3.txt: [('SEN', 'TINEL')]
AmSn18950221-V10-08-page4.txt: [('SEN', 'TINEL')]
AmSn18950228-V10-09-page2.txt: [('a', 'pplication')]
AmSn18950228-V10-09-page4.txt: [('ex', 'emptions')]
AmSn18950228-V10-09-page6.txt: [('Se', 'dalia')]
AmSn18950228-V10-09-page7.txt: [('I', 'CE')]
AmSn18950307-V10-10-page8.txt: [('SEN', 'TINEL')]
AmSn18950314-V10-11-page2.txt: [('Method', 'ist')]
AmSn18950321-V10-12-page5.txt: [('so', 'ciety')]
AmSn18950328-V10-13-page4.txt: [('improve', 'ments')]
AmSn18950328-V10-13-page6.txt: [('to', 'ke')]
AmSn18950328-V10-13-page7.txt: [('pro', 'fusely')]
AmSn18950404-V10-14-page5.txt: [('peo', 'ple')]
AmSn18950404-V10-14-page6.txt: [('in', 'quirer')]
AmSn18950404-V10-14-page8.txt: [('edit', 'orial')]
AmSn18950411-V10-15-page2.txt: [('de', 'nial')]
AmSn18950411-V10-15-page5.txt: [('et', 'tA')]
AmSn18950418-V10-16-page7.txt: [('pro', 'tem'), ('en', 'couraged')]
AmSn18950425-V10-17-page1.txt: [('minis', 'ters')]
AmSn18950425-V10-17-page3.txt: [('a', 'nd')]
AmSn18950425-V10-17-page6.txt: [('B', 'argain')]
AmSn18950425-V10-17-page7.txt: [('con', 'dition')]
AmSn18950502-V10-18-page1.txt: [('state', 'ments')]
AmSn18950502-V10-18-page5.txt: [('judi', 'cial')]
AmSn18950502-V10-18-page6.txt: [('De', 'mand')]
AmSn18950502-V10-18-page7.txt: [('to', 're')]
AmSn18950509-V10-19-page3.txt: [('AMER', 'ICAN')]
AmSn18950509-V10-19-page4.txt: [('ha', 've')]
AmSn18950509-V10-19-page6.txt: [('we', 'ek')]
AmSn18950516-V10-20-page4.txt: [('chris', 'tening')]
AmSn18950523-V10-21-page1.txt: [('de', 'feated')]
AmSn18950523-V10-21-page3.txt: [('esta', 'blished')]
AmSn18950523-V10-21-page5.txt: [('big', 'otry')]
AmSn18950523-V10-21-page7.txt: [('ToRI', 'ES'), ('diction', 'ary')]
AmSn18950523-V10-21-page8.txt: [('Prot', 'estants'), ('ma', 'jority')]
AmSn18950530-V10-22-page5.txt: [('SEN', 'TINEL')]
AmSn18950530-V10-22-page8.txt: [('con', 'viction')]
AmSn18950606-V10-23-page4.txt: [('LEG', 'ISLATION'), ('PUNISH', 'MENT'), ('PUN', 'ISHED'), ('CHRIS', 'TIAN')]
AmSn18950606-V10-23-page8.txt: [('SEN', 'TINEL')]
AmSn18950613-V10-24-page1.txt: [('Y', 'ork'), ('ques', 'tions')]
AmSn18950613-V10-24-page7.txt: [('N', 'inety')]
AmSn18950620-V10-25-page1.txt: [('an', 'cient')]
AmSn18950620-V10-25-page2.txt: [('for', 'merly')]
AmSn18950620-V10-25-page5.txt: [('in', 'sidious'), ('AMER', 'ICAN')]
AmSn18950620-V10-25-page7.txt: [('A', 'ttractive'), ('A', 'merican'), ('Y', 'ork'), ('V', 'ersion')]
AmSn18950627-V10-26-page3.txt: [('A', 'MI')]
AmSn18950627-V10-26-page6.txt: [('lib', 'erty')]
AmSn18950627-V10-26-page7.txt: [('Ad', 'ventists')]
AmSn18950718-V10-29-page7.txt: [('a', 're')]
AmSn18950718-V10-29-page8.txt: [('SEN', 'TINEL')]
AmSn18950725-V10-30-page5.txt: [('depre', 'cated')]
AmSn18950801-V10-31-page3.txt: [('Judge', 'th')]
AmSn18950801-V10-31-page5.txt: [('REA', 'VIS')]
AmSn18950815-V10-32-page8.txt: [('reli', 'gious')]
AmSn18950829-V10-34-page4.txt: [('MOVE', 'MENT')]
AmSn18950829-V10-34-page8.txt: [('SEN', 'TINEL')]
AmSn18950905-V10-35-page2.txt: [('SEN', 'TINEL')]
AmSn18950905-V10-35-page7.txt: [('B', 'IB')]
AmSn18950919-V10-37-page5.txt: [('Cath', 'olic')]
AmSn18950926-V10-38-page5.txt: [('in', 'fluence'), ('the', 're')]
AmSn18950926-V10-38-page6.txt: [('E', 'LI')]
AmSn18950926-V10-38-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER')]
AmSn18951017-V10-41-page8.txt: [('a', 'mis')]
AmSn18951024-V10-42-page7.txt: [('a', 'll'), ('A', 'Il'), ('all', 'ee'), ('T', 'ia')]
AmSn18951031-V10-43-page6.txt: [('Men', 'stealers')]
AmSn18951031-V10-43-page7.txt: [('i', 'ri'), ('n', 'orw'), ('endure', 'th'), ('e', 'lf')]
AmSn18951107-V10-44-page3.txt: [('sal', 'vation')]
AmSn18951107-V10-44-page7.txt: [('pro', 'fusely')]
AmSn18951121-V10-46-page5.txt: [('reg', 'ular'), ('or', 'ganization')]
AmSn18951121-V10-46-page7.txt: [('Y', 'ork')]
AmSn18951128-V10-47-page6.txt: [('S', 'weet')]
AmSn18951128-V10-47-page7.txt: [('o', 'ne')]
AmSn18951128-V10-47-page8.txt: [('W', 'ine'), ('GEN', 'ERAL')]
AmSn18960102-V11-01-page8.txt: [('SEN', 'TINEL')]
AmSn18960109-V11-02-page7.txt: [('St', 'ates'), ('V', 'ery')]
AmSn18960116-V11-03-page2.txt: [('gover', 'nment')]
AmSn18960116-V11-03-page7.txt: [('pro', 'fusely')]
AmSn18960130-V11-05-page6.txt: [('a', 'nd')]
AmSn18960213-V11-07-page1.txt: [('ED', 'ITORS')]
AmSn18960213-V11-07-page3.txt: [('dam', 'es')]
AmSn18960220-V11-08-page2.txt: [('IN', 'SP')]
AmSn18960220-V11-08-page4.txt: [('prop', 'erly')]
AmSn18960220-V11-08-page5.txt: [('au', 'th'), ('of', 'ttimes')]
AmSn18960220-V11-08-page6.txt: [('to', 'wns')]
AmSn18960220-V11-08-page7.txt: [('nan', 'na'), ('sim', 'ple'), ('W', 'ith')]
AmSn18960220-V11-08-page8.txt: [('SEN', 'TINEL'), ('SEN', "TINEL'S")]
AmSn18960227-V11-09-page3.txt: [('attend', 'ance')]
AmSn18960227-V11-09-page8.txt: [('AMER', 'ICAN')]
AmSn18960305-V11-10-page1.txt: [('the', 'Ca')]
AmSn18960305-V11-10-page6.txt: [('CA', 'NT')]
AmSn18960305-V11-10-page7.txt: [('A', 'GA')]
AmSn18960305-V11-10-page8.txt: [('b', 'Ra')]
AmSn18960312-V11-11-page5.txt: [('AMER', 'ICAN')]
AmSn18960312-V11-11-page7.txt: [('W', 'ith')]
AmSn18960319-V11-12-page1.txt: [('e', 'cu')]
AmSn18960319-V11-12-page3.txt: [('Chris', 'tianity')]
AmSn18960326-V11-13-page5.txt: [('in', 'vaded')]
AmSn18960326-V11-13-page7.txt: [('b', 'irders')]
AmSn18960409-V11-15-page3.txt: [('fur', 'therance')]
AmSn18960409-V11-15-page5.txt: [('ON', 'TARIO')]
AmSn18960507-V11-19-page5.txt: [('de', 'mand')]
AmSn18960514-V11-20-page3.txt: [('to', 'wn')]
AmSn18960514-V11-20-page7.txt: [('CLA', 'SS'), ('She', 'lah')]
AmSn18960521-V11-21-page3.txt: [('AD', 'VENTIST')]
AmSn18960528-V11-22-page3.txt: [('pro', 'hibited')]
AmSn18960604-V11-23-page1.txt: [('rec', 'eives')]
AmSn18960604-V11-23-page2.txt: [('See', 'th')]
AmSn18960604-V11-23-page4.txt: [('pro', 'hibition')]
AmSn18960604-V11-23-page7.txt: [('Me', 'shullam'), ('a', 'Ne'), ('t', 'Ex'), ('nah', 'Ma'), ('Le', 'vites'), ('DAn', 'iel')]
AmSn18960611-V11-24-page3.txt: [('En', 'deavor')]
AmSn18960618-V11-25-page7.txt: [('go', 'ld')]
AmSn18960702-V11-26-page1.txt: [('DEC', 'LARATION')]
AmSn18960702-V11-26-page2.txt: [('be', 'lieve')]
AmSn18960702-V11-26-page3.txt: [('Ref', 'orm')]
AmSn18960702-V11-26-page7.txt: [('Pro', 'hibition'), ('perfect', 'ly'), ('Limit', 'ations')]
AmSn18960716-V11-28-page2.txt: [('are', 'Ca')]
AmSn18960716-V11-28-page4.txt: [('the', 're')]
AmSn18960716-V11-28-page7.txt: [('Ha', 'shum'), ('V', 'al'), ('NEH', 'EMIAH'), ('r', 'Ex'), ('Ha', 'shub'), ('nah', 'Ma'), ('E', 'zra'), ('a', 'bi'), ('Le', 'vites')]
AmSn18960723-V11-29-page1.txt: [('J', 'ULY')]
AmSn18960723-V11-29-page5.txt: [('con', 'ventions')]
AmSn18960723-V11-29-page7.txt: [('A', 'lian')]
AmSn18960730-V11-30-page2.txt: [('des', 'tinies')]
AmSn18960730-V11-30-page4.txt: [('Script', 'UreS')]
AmSn18960806-V11-31-page2.txt: [('Dan', 'iel')]
AmSn18960813-V11-32-page7.txt: [('jah', 'Az'), ('Ha', 'shub'), ('a', 'NS')]
AmSn18960820-V11-33-page6.txt: [('now', 'adays')]
AmSn18960820-V11-33-page7.txt: [('Le', 'vites'), ('a', 'Ne')]
AmSn18960827-V11-34-page5.txt: [('T', 'wo')]
AmSn18960827-V11-34-page7.txt: [('A', 'merican')]
AmSn18960827-V11-34-page8.txt: [('a', 'ny')]
AmSn18960903-V11-35-page2.txt: [('to', 're')]
AmSn18960903-V11-35-page3.txt: [('a', 'mo')]
AmSn18960903-V11-35-page5.txt: [('inter', 'esting')]
AmSn18960903-V11-35-page7.txt: [('a', 'sa'), ('Le', 'vItes'), ('a', 'liah'), ('I', 'ra'), ('a', 'Ne'), ('He', 'zir')]
AmSn18960910-V11-36-page3.txt: [('A', 'll'), ('as', 'semblies')]
AmSn18960910-V11-36-page7.txt: [('E', 'zra')]
AmSn18960917-V11-37-page2.txt: [('in', 'dictment'), ('d', 'iscourse')]
AmSn18960917-V11-37-page3.txt: [('Demo', 'crat')]
AmSn18960917-V11-37-page7.txt: [('G', 'aG'), ('nah', 'MA')]
AmSn18960924-V11-38-page5.txt: [('Ruth', 'erford')]
AmSn18960924-V11-38-page7.txt: [('Y', 'ork'), ('Le', 'vites')]
AmSn18961015-V11-41-page1.txt: [('Cardin', 'al')]
AmSn18961015-V11-41-page7.txt: [('t', 'ow')]
AmSn18961015-V11-41-page8.txt: [('Aguas', 'Calientes')]
AmSn18961022-V11-42-page1.txt: [('de', 'manded'), ('dema', 'nd')]
AmSn18961022-V11-42-page3.txt: [('rec', 'ognized'), ('spir', 'itual')]
AmSn18961022-V11-42-page4.txt: [('ME', 'th')]
AmSn18961029-V11-43-page1.txt: [('per', 'se')]
AmSn18961029-V11-43-page3.txt: [('CHRIS', 'TIAN')]
AmSn18961029-V11-43-page7.txt: [('v', 'olumes'), ('W', 'ith')]
AmSn18961029-V11-43-page8.txt: [('of', 'ficio')]
AmSn18961105-V11-44-page2.txt: [('polit', 'ical')]
AmSn18961105-V11-44-page7.txt: [('de', 'scription')]
AmSn18961112-V11-45-page2.txt: [('SEN', 'TINEL'), ('A', 'dventists'), ('CIT', 'IZENSHIP')]
AmSn18961119-V11-46-page3.txt: [('per', 'mit')]
AmSn18961119-V11-46-page7.txt: [('Le', 'vites')]
AmSn18961126-V11-47-page4.txt: [('AMER', 'ICAN'), ('GOVERN', 'MENT')]
AmSn18961126-V11-47-page5.txt: [('SEN', 'TINEL')]
AmSn18961126-V11-47-page6.txt: [('Su', 'nday')]
AmSn18961126-V11-47-page8.txt: [('SEN', 'TINEL')]
AmSn18961203-V11-48-page5.txt: [('Chris', 'tian')]
AmSn18961203-V11-48-page6.txt: [('SEN', 'TINEL'), ('Christian', 'ity')]
AmSn18961203-V11-48-page7.txt: [('N', 'th'), ('a', 'NS')]
AmSn18961217-V11-50-page6.txt: [('e', 'arly')]
AmSn18961217-V11-50-page7.txt: [('K', 'ey'), ('a', 'Ne')]
AmSn18961224-V11-51-page1.txt: [('s', 'ubterfuge')]
AmSn18961224-V11-51-page3.txt: [('de', 'manding')]
AmSn18961224-V11-51-page4.txt: [('storekee', 'pers')]
AmSn18961224-V11-51-page6.txt: [('A', 'Mt')]
AmSn18961224-V11-51-page7.txt: [('import', 'ance'), ('Ba', 'ni'), ('i', 'll'), ('a', 'Ne'), ('HA', 'sh'), ('Ha', 'rIph'), ('Le', 'vites')]
AmSn18961224-V11-51-page8.txt: [('SEN', 'TINEL')]
AmSn18970107-V12-01-page10.txt: [('repudi', 'ated')]
AmSn18970107-V12-01-page16.txt: [('Le', 'vites'), ('A', 'MERICA')]
AmSn18970114-V12-02-page14.txt: [('A', 'MERICAN')]
AmSn18970114-V12-02-page4.txt: [('entertainmen', 'ts')]
AmSn18970121-V12-03-page1.txt: [('A', 'LONZO')]
AmSn18970121-V12-03-page13.txt: [('Govern', 'ment')]
AmSn18970121-V12-03-page7.txt: [('a', 'nd')]
AmSn18970121-V12-03-page8.txt: [('the', 'Ca'), ('Christia', 'nity')]
AmSn18970128-V12-04-page1.txt: [('a', 'nd')]
AmSn18970128-V12-04-page13.txt: [('Pry', "or's")]
AmSn18970128-V12-04-page6.txt: [('work', 'ers'), ('pres', 'ent')]
AmSn18970204-V12-05-page14.txt: [('sub', 'ject')]
AmSn18970204-V12-05-page4.txt: [('P', 'Al')]
AmSn18970204-V12-05-page6.txt: [('sus', 'tained')]
AmSn18970204-V12-05-page9.txt: [('forma', 'tion')]
AmSn18970211-V12-06-page14.txt: [('AMER', 'ICAN')]
AmSn18970211-V12-06-page2.txt: [('DE', 'CISIONS')]
AmSn18970211-V12-06-page6.txt: [('de', 'cided')]
AmSn18970211-V12-06-page8.txt: [('cir', 'cumspection')]
AmSn18970218-V12-07-page15.txt: [('in', 'terest')]
AmSn18970218-V12-07-page16.txt: [('nah', 'MA')]
AmSn18970225-V12-08-page1.txt: [('A', 'LONZO')]
AmSn18970225-V12-08-page11.txt: [('SEC', 'TIONS')]
AmSn18970225-V12-08-page15.txt: [('contin', 'ual')]
AmSn18970225-V12-08-page4.txt: [('End', 'eavorer')]
AmSn18970225-V12-08-page6.txt: [('sub', 'ject'), ('pla', 'ces')]
AmSn18970304-V12-09-page10.txt: [('A', 'MERICAN')]
AmSn18970311-V12-10-page14.txt: [('m', 'uch')]
AmSn18970311-V12-10-page2.txt: [('exam', 'ine')]
AmSn18970311-V12-10-page7.txt: [('E', 'RI')]
AmSn18970318-V12-11-page14.txt: [('Kan', 'sas')]
AmSn18970318-V12-11-page15.txt: [('P', 'ACIFIC'), ('in', 'terest')]
AmSn18970318-V12-11-page7.txt: [('con', 'demned')]
AmSn18970318-V12-11-page8.txt: [('J', 'oash')]
AmSn18970325-V12-12-page10.txt: [('dispe', 'nse')]
AmSn18970325-V12-12-page16.txt: [('W', 'ith'), ('sim', 'ple')]
AmSn18970325-V12-12-page3.txt: [('im', 'portance')]
AmSn18970401-V12-13-page15.txt: [('P', 'olitical')]
AmSn18970401-V12-13-page16.txt: [('W', 'ith')]
AmSn18970401-V12-13-page2.txt: [('con', 'cerning'), ('ChriS', 'tians')]
AmSn18970401-V12-13-page7.txt: [('r', 'um')]
AmSn18970401-V12-13-page8.txt: [('a', 'sk')]
AmSn18970408-V12-14-page10.txt: [('p', 'urely')]
AmSn18970408-V12-14-page11.txt: [('SEN', 'TINEL')]
AmSn18970408-V12-14-page15.txt: [('pro', 'greSSed')]
AmSn18970408-V12-14-page16.txt: [('W', 'ith'), ('M', 'RS')]
AmSn18970408-V12-14-page9.txt: [('fundament', 'al')]
AmSn18970415-V12-15-page2.txt: [('ma', 'jority')]
AmSn18970415-V12-15-page4.txt: [('appoint', 'ment')]
AmSn18970415-V12-15-page9.txt: [('a', 'nd')]
AmSn18970422-V12-16-page13.txt: [('spirit', 'ual')]
AmSn18970422-V12-16-page7.txt: [('t', 'wo')]
AmSn18970429-V12-17-page1.txt: [('a', 'reli')]
AmSn18970429-V12-17-page10.txt: [('Is', 'rael')]
AmSn18970429-V12-17-page11.txt: [('to', 'es')]
AmSn18970429-V12-17-page15.txt: [('in', 'terest')]
AmSn18970429-V12-17-page16.txt: [('N', 'th'), ('Le', 'vites')]
AmSn18970429-V12-17-page4.txt: [('d', 'ay')]
AmSn18970506-V12-18-page1.txt: [('sim', 'plicity')]
AmSn18970506-V12-18-page12.txt: [('en', 'forcement')]
AmSn18970506-V12-18-page13.txt: [('god', 'liness'), ('Chris', 'tian')]
AmSn18970506-V12-18-page4.txt: [('degrada', 'tion')]
AmSn18970513-V12-19-page12.txt: [('as', 'certaining')]
AmSn18970513-V12-19-page13.txt: [('inter', 'rupted')]
AmSn18970513-V12-19-page4.txt: [('pro', 'mulgation')]
AmSn18970513-V12-19-page9.txt: [('hun', 'dreds')]
AmSn18970520-V12-20-page1.txt: [('cor', 'ruptible')]
AmSn18970520-V12-20-page14.txt: [('w', 'ith'), ('m', 'etal')]
AmSn18970520-V12-20-page15.txt: [('in', 'terest'), ('M', 'atthew')]
AmSn18970520-V12-20-page9.txt: [('a', 'ngel'), ('sin', 'gle'), ('a', 'bsolutely')]
AmSn18970527-V12-21-page1.txt: [('k', 'OA')]
AmSn18970527-V12-21-page14.txt: [('f', 'ORK')]
AmSn18970527-V12-21-page15.txt: [('in', 'terest')]
AmSn18970603-V12-22-page11.txt: [('in', 'dulged')]
AmSn18970603-V12-22-page13.txt: [('a', 'nd'), ('con', 'cerned')]
AmSn18970603-V12-22-page14.txt: [('m', 'onths')]
AmSn18970603-V12-22-page16.txt: [('W', 'ith')]
AmSn18970603-V12-22-page6.txt: [('d', 'oors')]
AmSn18970603-V12-22-page7.txt: [('a', 'll')]
AmSn18970603-V12-22-page8.txt: [('In', 'asmuch'), ('m', 'em')]
AmSn18970610-V12-23-page13.txt: [('de', 'nominational')]
AmSn18970610-V12-23-page5.txt: [('in', 'struction')]
AmSn18970610-V12-23-page6.txt: [('pe', 'culiar')]
AmSn18970610-V12-23-page9.txt: [('per', 'se')]
AmSn18970617-V12-24-page3.txt: [('A', 'MERICAN')]
AmSn18970624-V12-25-page4.txt: [('right', 'eousness')]
AmSn18970624-V12-25-page6.txt: [('Fed', 'eral')]
AmSn18970701-V12-26-page11.txt: [('con', 'vention')]
AmSn18970701-V12-26-page14.txt: [('SEN', 'TINEL')]
AmSn18970701-V12-26-page2.txt: [('dis', 'tinctly')]
AmSn18970701-V12-26-page3.txt: [('a', 'nd')]
AmSn18970707-V12-27-page11.txt: [('Hin', 'doos')]
AmSn18970707-V12-27-page14.txt: [('in', 'terest'), ('cal', 'ender')]
AmSn18970707-V12-27-page15.txt: [('G', 'od')]
AmSn18970707-V12-27-page4.txt: [('SEN', 'TINEL')]
AmSn18970707-V12-27-page6.txt: [('govern', 'ments')]
AmSn18970715-V12-28-page13.txt: [('A', 'MERICAN')]
AmSn18970715-V12-28-page14.txt: [('in', 'terest')]
AmSn18970715-V12-28-page7.txt: [('ha', 'th'), ('h', 'ome')]
AmSn18970715-V12-28-page8.txt: [('sig', 'nificant')]
AmSn18970722-V12-29-page1.txt: [('ex', 'cept')]
AmSn18970722-V12-29-page10.txt: [('consti', 'tutional')]
AmSn18970722-V12-29-page14.txt: [('in', 'terest')]
AmSn18970722-V12-29-page5.txt: [('the', 'se')]
AmSn18970729-V12-30-page14.txt: [('in', 'terest')]
AmSn18970729-V12-30-page16.txt: [('W', 'ith'), ('sim', 'ple')]
AmSn18970805-V12-31-page1.txt: [('prof', 'itable')]
AmSn18970805-V12-31-page10.txt: [('a', 'nd')]
AmSn18970805-V12-31-page13.txt: [('SEN', 'TINEL')]
AmSn18970805-V12-31-page14.txt: [('in', 'terest')]
AmSn18970812-V12-32-page15.txt: [('Y', 'ork')]
AmSn18970812-V12-32-page6.txt: [('con', 'fidently')]
AmSn18970812-V12-32-page7.txt: [('SEN', 'TINEL')]
AmSn18970819-V12-33-page14.txt: [('con', 'nected'), ('Y', 'ork')]
AmSn18970819-V12-33-page16.txt: [('W', 'ith'), ('sim', 'ple')]
AmSn18970819-V12-33-page2.txt: [('a', 're'), ('Chris', 'tians')]
AmSn18970819-V12-33-page5.txt: [('Cir', 'cumcision')]
AmSn18970909-V12-35-page14.txt: [('Y', 'ork')]
AmSn18970909-V12-35-page16.txt: [('m', 'oth')]
AmSn18970916-V12-36-page4.txt: [('y', 'ou')]
AmSn18970916-V12-36-page6.txt: [('inte', 'nts')]
AmSn18970923-V12-37-page12.txt: [('disap', 'pointments')]
AmSn18970923-V12-37-page13.txt: [('Y', 'ork')]
AmSn18970923-V12-37-page14.txt: [('pub', 'lished')]
AmSn18970923-V12-37-page15.txt: [('T', 'ao'), ('a', 'ce')]
AmSn18970923-V12-37-page16.txt: [('n', 'ote'), ('n', 'otes')]
AmSn18970923-V12-37-page4.txt: [('be', 'ng'), ('in', 'terests')]
AmSn18970923-V12-37-page6.txt: [('a', 'nd')]
AmSn18970923-V12-37-page8.txt: [('real', 'ization')]
AmSn18970923-V12-37-page9.txt: [('polit', 'ical'), ('de', 'termined')]
AmSn18970930-V12-38-page13.txt: [('SEN', 'TINEL')]
AmSn18970930-V12-38-page16.txt: [('ra', 'ca'), ('ho', 'Ts'), ('g', 'rin')]
AmSn18970930-V12-38-page4.txt: [('con', 'vention'), ('in', 'toxicated')]
AmSn18970930-V12-38-page6.txt: [('A', 'MERICAN')]
AmSn18971007-V12-39-page13.txt: [('ad', 'vocate')]
AmSn18971007-V12-39-page9.txt: [('Amer', 'ican')]
AmSn18971014-V12-40-page16.txt: [('p', 'PM'), ('I', 'ce'), ('i', 'SL'), ('w', 'ei'), ('s', 'Om'), ('I', 're'), ('m', 'id')]
AmSn18971021-V12-41-page12.txt: [('won', 'dered')]
AmSn18971021-V12-41-page13.txt: [('j', 'oy')]
AmSn18971021-V12-41-page14.txt: [('Dic', 'tionary'), ('the', 're')]
AmSn18971021-V12-41-page16.txt: [('a', 'ka'), ('A', 'ddress'), ('R', 'cd'), ('a', 'ft'), ('I', 're')]
AmSn18971021-V12-41-page2.txt: [('attend', 'ance')]
AmSn18971021-V12-41-page5.txt: [('im', 'agined')]
AmSn18971021-V12-41-page6.txt: [('d', 'welleth')]
AmSn18971021-V12-41-page9.txt: [('A', 'VER')]
AmSn18971028-V12-42-page1.txt: [('SEN', 'TINEL')]
AmSn18971028-V12-42-page14.txt: [('in', 'terest')]
AmSn18971028-V12-42-page16.txt: [('a', 'Ne'), ('a', 'li')]
AmSn18971028-V12-42-page2.txt: [('SEN', 'TINEL')]
AmSn18971028-V12-42-page9.txt: [('a', 'nd')]
AmSn18971104-V12-43-page14.txt: [('in', 'terest')]
AmSn18971104-V12-43-page16.txt: [('a', 'Ne')]
AmSn18971104-V12-43-page3.txt: [('as', 'serted'), ('di', 'rection')]
AmSn18971104-V12-43-page9.txt: [('Insp', 'iration')]
AmSn18971111-V12-44-page15.txt: [('in', 'ca')]
AmSn18971111-V12-44-page16.txt: [('Me', 'shullam'), ('a', 'Ne')]
AmSn18971111-V12-44-page2.txt: [('a', 'nd')]
AmSn18971111-V12-44-page8.txt: [('a', 'bundantly')]
AmSn18971118-V12-45-page14.txt: [('DiCtion', 'ary')]
AmSn18971118-V12-45-page15.txt: [('in', 'ti'), ('Kan', 'sas'), ('Com', 'pany')]
AmSn18971118-V12-45-page16.txt: [('Som', 'ething'), ('a', 'Ne')]
AmSn18971125-V12-46-page10.txt: [('im', 'portant')]
AmSn18971125-V12-46-page11.txt: [('We', 'll'), ('We', 're')]
AmSn18971125-V12-46-page14.txt: [('A', 'pril')]
AmSn18971125-V12-46-page15.txt: [('fur', 'nished')]
AmSn18971125-V12-46-page16.txt: [('a', 'Ne')]
AmSn18971125-V12-46-page2.txt: [('En', 'deavor')]
AmSn18971125-V12-46-page3.txt: [('in', 'stitution')]
AmSn18971202-V12-47-page1.txt: [('Chris', 'tian')]
AmSn18971202-V12-47-page12.txt: [('alw', 'ays')]
AmSn18971202-V12-47-page7.txt: [('a', 'nd')]
AmSn18971209-V12-48-page14.txt: [('Qual', 'ity'), ('in', 'ca')]
AmSn18971209-V12-48-page16.txt: [('ah', 'Az'), ('a', 'Ne')]
AmSn18971209-V12-48-page6.txt: [('pro', 'fession')]
AmSn18971209-V12-48-page7.txt: [('P', 'rotestants')]
AmSn18971216-V12-49-page13.txt: [('Com', 'bination')]
AmSn18971216-V12-49-page14.txt: [('de', 'scriptive'), ('in', 'ca')]
AmSn18971216-V12-49-page16.txt: [('H', 'eadband')]
AmSn18971216-V12-49-page6.txt: [('the', 're')]
AmSn18971216-V12-49-page7.txt: [('per', 'se')]
AmSn18971216-V12-49-page9.txt: [('ex', 'pire')]
AmSn18971230-V12-50-page1.txt: [('A', 'licia')]
AmSn18971230-V12-50-page14.txt: [('in', 'terest')]
AmSn18971230-V12-50-page4.txt: [('occa', 'sions')]
AmSn18971230-V12-50-page9.txt: [('b', 'ast')]
AmSn18980106-V13-01-page2.txt: [('ex', 'pected'), ('be', 'lieve')]
AmSn18980113-V13-02-page1.txt: [('l', 'imn'), ('in', 'stincts')]
AmSn18980113-V13-02-page13.txt: [('SEN', 'TINEL')]
AmSn18980113-V13-02-page16.txt: [('a', 'NS')]
AmSn18980113-V13-02-page4.txt: [('Chri', 'stian')]
AmSn18980113-V13-02-page5.txt: [('pos', 'sible')]
AmSn18980113-V13-02-page7.txt: [('a', 'ttending')]
AmSn18980120-V13-03-page15.txt: [('in', 'terest')]
AmSn18980120-V13-03-page16.txt: [('Th', 'ey'), ('s', 'itz')]
AmSn18980120-V13-03-page6.txt: [('de', 'cided')]
AmSn18980127-V13-04-page14.txt: [('in', 'terest'), ('Cat', 'arrh')]
AmSn18980127-V13-04-page16.txt: [('ah', 'Az'), ('T', 'OW')]
AmSn18980127-V13-04-page2.txt: [('Congregation', 'alist')]
AmSn18980127-V13-04-page4.txt: [('per', 'suaded')]
AmSn18980203-V13-05-page14.txt: [('med', 'icine')]
AmSn18980203-V13-05-page5.txt: [('r', 'espect')]
AmSn18980210-V13-06-page12.txt: [('su', 're')]
AmSn18980210-V13-06-page14.txt: [('in', 'terest')]
AmSn18980210-V13-06-page16.txt: [('the', 'se'), ('Con', 'gress')]
AmSn18980210-V13-06-page5.txt: [('Us', 'urped')]
AmSn18980210-V13-06-page9.txt: [('lib', 'erty'), ('estab', 'lished'), ('re', 'gards')]
AmSn18980217-V13-07-page14.txt: [('in', 'terest'), ('in', 'ca')]
AmSn18980217-V13-07-page15.txt: [('a', 'rak')]
AmSn18980217-V13-07-page3.txt: [('pro', 'fession')]
AmSn18980217-V13-07-page7.txt: [('A', 'MERICAN')]
AmSn18980224-V13-08-page14.txt: [('in', 'terest'), ('Wagon', 'ettes'), ('W', 'ig')]
AmSn18980224-V13-08-page15.txt: [('f', 'orgo')]
AmSn18980224-V13-08-page8.txt: [('SEN', 'TINEL')]
AmSn18980224-V13-08-page9.txt: [('fa', 'tuus')]
AmSn18980303-V13-09-page14.txt: [('in', 'terest')]
AmSn18980303-V13-09-page15.txt: [('judg', 'ments')]
AmSn18980303-V13-09-page4.txt: [('mer', 'rily')]
AmSn18980303-V13-09-page6.txt: [('in', 'clined'), ('Gov', 'ernment')]
AmSn18980310-V13-10-page14.txt: [('A', 'ddress'), ('g', 'od')]
AmSn18980310-V13-10-page15.txt: [('a', 'Ne')]
AmSn18980310-V13-10-page16.txt: [('inf', 'ormed')]
AmSn18980310-V13-10-page2.txt: [('sev', 'eral')]
AmSn18980310-V13-10-page7.txt: [('J', 'erusalem'), ('appear', 'ance')]
AmSn18980310-V13-10-page8.txt: [('faith', 'ful')]
AmSn18980317-V13-11-page14.txt: [('A', 'loth')]
AmSn18980317-V13-11-page15.txt: [('for', 'ES'), ('a', 'Ne')]
AmSn18980317-V13-11-page8.txt: [('act', 'uated')]
AmSn18980324-V13-12-page12.txt: [('see', 'th')]
AmSn18980324-V13-12-page15.txt: [('a', 'Ne')]
AmSn18980324-V13-12-page3.txt: [('gov', 'ernment')]
AmSn18980324-V13-12-page8.txt: [('Chris', 'tian')]
AmSn18980331-V13-13-page14.txt: [('in', 'terest')]
AmSn18980331-V13-13-page15.txt: [('a', 'Ne'), ('ill', 'ustrations')]
AmSn18980331-V13-13-page16.txt: [('w', 'ould')]
AmSn18980407-V13-14-page14.txt: [('in', 'terest')]
AmSn18980407-V13-14-page15.txt: [('f', 'orgo')]
AmSn18980414-V13-15-page10.txt: [('like', 'th')]
AmSn18980414-V13-15-page14.txt: [('in', 'terest'), ('in', 'ca')]
AmSn18980414-V13-15-page15.txt: [('A', 'Mt'), ('Le', 'vites'), ('a', 'Ne')]
AmSn18980414-V13-15-page3.txt: [('st', 'atement')]
AmSn18980414-V13-15-page4.txt: [('sac', 'rificed')]
AmSn18980414-V13-15-page6.txt: [('fun', 'damental')]
AmSn18980421-V13-16-page14.txt: [('in', 'terest')]
AmSn18980428-V13-17-page12.txt: [('au', 'thorities')]
AmSn18980428-V13-17-page13.txt: [('a', 'reli'), ('r', 'om'), ('fur', 'nished')]
AmSn18980428-V13-17-page14.txt: [('Kan', 'sas'), ('Com', 'pany'), ('in', 'terest')]
AmSn18980428-V13-17-page16.txt: [('Amer', 'icans')]
AmSn18980428-V13-17-page3.txt: [('exam', 'ple')]
AmSn18980428-V13-17-page5.txt: [('A', 'MERICAN')]
AmSn18980505-V13-18-page13.txt: [('mod', 'ern')]
AmSn18980505-V13-18-page14.txt: [('in', 'terest')]
AmSn18980505-V13-18-page5.txt: [('a', 'nd')]
AmSn18980512-V13-19-page14.txt: [('DI', 'ES')]
AmSn18980519-V13-20-page1.txt: [('Chris', 'tian')]
AmSn18980519-V13-20-page14.txt: [('especial', 'ly')]
AmSn18980519-V13-20-page2.txt: [('a', 'id')]
AmSn18980519-V13-20-page6.txt: [('con', 'cern')]
AmSn18980519-V13-20-page7.txt: [('part', 'nership')]
AmSn18980526-V13-21-page14.txt: [('In', 'terest'), ('especial', 'ly')]
AmSn18980526-V13-21-page5.txt: [('meth', 'ods')]
AmSn18980602-V13-22-page14.txt: [('especial', 'ly')]
AmSn18980602-V13-22-page15.txt: [('Add', 'ress')]
AmSn18980602-V13-22-page3.txt: [('par', 'tial')]
AmSn18980602-V13-22-page7.txt: [('s', 'un')]
AmSn18980609-V13-23-page14.txt: [('Com', 'pany'), ('especial', 'ly'), ('HA', 'YS')]
AmSn18980609-V13-23-page15.txt: [('i', 'ri'), ('a', 're'), ('a', 'il'), ('e', 'gg')]
AmSn18980609-V13-23-page2.txt: [('perma', 'nent'), ('per', 'manent')]
AmSn18980609-V13-23-page3.txt: [('ques', 'tion')]
AmSn18980609-V13-23-page9.txt: [('Minis', "ters'")]
AmSn18980616-V13-24-page10.txt: [('Chris', 'tian')]
AmSn18980616-V13-24-page13.txt: [('w', 'ork'), ('fur', 'nished')]
AmSn18980616-V13-24-page14.txt: [('Com', 'pany'), ('especial', 'ly')]
AmSn18980616-V13-24-page15.txt: [('i', 'Cel'), ('Er', 'ie'), ('m', 'oi')]
AmSn18980616-V13-24-page2.txt: [('Colo', 'nel')]
AmSn18980616-V13-24-page7.txt: [('an', 'swer')]
AmSn18980623-V13-25-page14.txt: [('in', 'terest'), ('V', 'aluable'), ('especial', 'ly')]
AmSn18980623-V13-25-page15.txt: [('a', 'll'), ('b', 'ecome'), ('E', 'ra'), ('not', 'ch')]
AmSn18980623-V13-25-page16.txt: [('Bis', 'marck')]
AmSn18980630-V13-26-page11.txt: [('Eng', 'lish')]
AmSn18980630-V13-26-page14.txt: [('especial', 'ly')]
AmSn18980630-V13-26-page15.txt: [('to', 'Il'), ('H', 'ay'), ('i', 'va')]
AmSn18980630-V13-26-page6.txt: [('cor', 'ruptions')]
AmSn18980630-V13-26-page9.txt: [('a', 'lready')]
AmSn18980714-V13-27-page14.txt: [('especial', 'ly')]
AmSn18980714-V13-27-page15.txt: [('I', 're'), ('to', 'Co'), ('r', 'CD'), ('th', 'ese'), ('O', 'ra')]
AmSn18980714-V13-27-page2.txt: [('evan', 'gelical'), ('be', 'lieve')]
AmSn18980714-V13-27-page3.txt: [('so', 'ul')]
AmSn18980714-V13-27-page7.txt: [('IS', 'TH')]
AmSn18980721-V13-28-page10.txt: [('Preside', 'nt')]
AmSn18980721-V13-28-page17.txt: [('to', 'ry')]
AmSn18980721-V13-28-page19.txt: [('especial', 'ly')]
AmSn18980721-V13-28-page6.txt: [('f', 'ol')]
AmSn18980728-V13-29-page10.txt: [('sub', 'jects')]
AmSn18980728-V13-29-page14.txt: [('especial', 'ly')]
AmSn18980728-V13-29-page7.txt: [('symbol', 'ized')]
AmSn18980804-V13-30-page14.txt: [('especial', 'ly')]
AmSn18980804-V13-30-page4.txt: [('ecclesias', 'tically')]
AmSn18980804-V13-30-page8.txt: [('ac', 'cept')]
AmSn18980811-V13-31-page12.txt: [('belie', 'veth')]
AmSn18980811-V13-31-page14.txt: [('especial', 'ly')]
AmSn18980811-V13-31-page16.txt: [('en', 'gagement')]
AmSn18980811-V13-31-page7.txt: [('the', 'se')]
AmSn18980818-V13-32-page14.txt: [('especial', 'ly')]
AmSn18980818-V13-32-page6.txt: [('crim', 'inal')]
AmSn18980825-V13-33-page1.txt: [('as', 'cend')]
AmSn18980825-V13-33-page7.txt: [('do', 'th')]
AmSn18980825-V13-33-page8.txt: [('a', 're')]
AmSn18980901-V13-34-page4.txt: [('in', 'itiative')]
AmSn18980908-V13-35-page12.txt: [('right', 'ful')]
AmSn18980908-V13-35-page2.txt: [('be', 'seeching')]
AmSn18980908-V13-35-page7.txt: [('Equal', 'ity')]
AmSn18980915-V13-36-page15.txt: [('P', 'ress')]
AmSn18980915-V13-36-page2.txt: [('prop', 'erly')]
AmSn18980922-V13-37-page14.txt: [('especial', 'ly')]
AmSn18980922-V13-37-page15.txt: [('cap', 'tivity')]
AmSn18980922-V13-37-page16.txt: [('for', 'eign'), ('Gov', 'ernment')]
AmSn18980922-V13-37-page5.txt: [('ad', 'venturers')]
AmSn18980922-V13-37-page6.txt: [('Cath', 'olics')]
AmSn18980929-V13-38-page13.txt: [('It', "'s")]
AmSn18980929-V13-38-page14.txt: [('especial', 'ly')]
AmSn18980929-V13-38-page15.txt: [('cap', 'tivity')]
AmSn18980929-V13-38-page2.txt: [('the', 'Ca'), ('Ca', 'tholic')]
AmSn18980929-V13-38-page9.txt: [('SEN', 'TINEL')]
AmSn18981006-V13-39-page1.txt: [('GOVERN', 'MENT')]
AmSn18981006-V13-39-page14.txt: [('cap', 'tivity')]
AmSn18981006-V13-39-page15.txt: [('especial', 'ly')]
AmSn18981013-V13-40-page14.txt: [('M', 'OD')]
AmSn18981013-V13-40-page15.txt: [('cap', 'tivity')]
AmSn18981013-V13-40-page16.txt: [('ten', 'dency')]
AmSn18981020-V13-41-page11.txt: [('enforce', 'ment')]
AmSn18981020-V13-41-page14.txt: [('cap', 'tivity'), ('In', 'terpretation'), ('especial', 'ly')]
AmSn18981020-V13-41-page3.txt: [('familiar', 'ity')]
AmSn18981027-V13-42-page15.txt: [('cap', 'tivity'), ('r', 'om'), ('especial', 'ly')]
AmSn18981103-V13-43-page11.txt: [('con', 'flict')]
AmSn18981103-V13-43-page14.txt: [('cap', 'tivity'), ('Kan', 'sas'), ('especial', 'ly')]
AmSn18981110-V13-44-page15.txt: [('cap', 'tivity'), ('especial', 'ly')]
AmSn18981110-V13-44-page6.txt: [('politi', 'cian')]
AmSn18981110-V13-44-page9.txt: [('author', 'ity')]
AmSn18981117-V13-45-page1.txt: [('spirit', 'ual')]
AmSn18981117-V13-45-page14.txt: [('cap', 'tivity'), ('especial', 'ly')]
AmSn18981124-V13-46-page15.txt: [('cap', 'tivity'), ('in', 'terpretation'), ('especial', 'ly')]
AmSn18981201-V13-47-page14.txt: [('cap', 'tivity'), ('In', 'terpretation'), ('especial', 'ly')]
AmSn18981201-V13-47-page16.txt: [('the', 'es'), ('reg', 'ular')]
AmSn18981201-V13-47-page3.txt: [('Cath', 'olic'), ('dig', 'nitaries')]
AmSn18981201-V13-47-page5.txt: [('inter', 'ests')]
AmSn18981201-V13-47-page9.txt: [('in', 'genious')]
AmSn18981208-V13-48-page13.txt: [('engr', 'avings')]
AmSn18981208-V13-48-page14.txt: [('r', 'io')]
AmSn18981208-V13-48-page15.txt: [('cap', 'tivity')]
AmSn18981208-V13-48-page7.txt: [('hap', 'piness')]
AmSn18981215-V13-49-page1.txt: [('GOVERN', 'MENT'), ('gov', 'ernment')]
AmSn18981215-V13-49-page14.txt: [('cap', 'tivity'), ('especial', 'ly')]
AmSn18981215-V13-49-page4.txt: [('ques', 'tion')]
AmSn18981215-V13-49-page5.txt: [('dis', 'seminate')]
AmSn18981229-V13-50-page14.txt: [('cap', 'tivity'), ('c', 'onn'), ('In', 'terpretation'), ('especial', 'ly')]
AmSn18981229-V13-50-page15.txt: [('C', 'itation')]
AmSn18981229-V13-50-page16.txt: [('ha', 'ndier')]
AmSn18981229-V13-50-page2.txt: [('go', 'vernment')]
AmSn18981229-V13-50-page3.txt: [('imperial', 'ist')]
AmSn18981229-V13-50-page7.txt: [('and', 're')]
AmSn18990105-V14-01-page15.txt: [('fascin', 'ating')]
AmSn18990105-V14-01-page2.txt: [('Con', 'gress')]
AmSn18990105-V14-01-page5.txt: [('cen', 'tury')]
AmSn18990112-V14-02-page14.txt: [('engr', 'avings')]
AmSn18990112-V14-02-page15.txt: [('fa', 'mily')]
AmSn18990119-V14-03-page10.txt: [('p', 'urpose')]
AmSn18990119-V14-03-page14.txt: [('un', 'derlying')]
AmSn18990126-V14-04-page11.txt: [('a', 'nd')]
AmSn18990126-V14-04-page13.txt: [('TO', 'OtER')]
AmSn18990126-V14-04-page15.txt: [('fa', 'mily'), ('C', 'itation')]
AmSn18990202-V14-05-page1.txt: [('PRO', 'FESSION')]
AmSn18990202-V14-05-page14.txt: [('r', 'avings')]
AmSn18990202-V14-05-page15.txt: [('t', 'itles'), ('N', 'ev'), ('C', 'itation')]
AmSn18990202-V14-05-page16.txt: [('SEN', 'TINEL')]
AmSn18990202-V14-05-page9.txt: [('We', 'll')]
AmSn18990209-V14-06-page12.txt: [('out', 'generaled')]
AmSn18990209-V14-06-page14.txt: [('cap', 'tivity')]
AmSn18990209-V14-06-page8.txt: [('to', 'ne')]
AmSn18990216-V14-07-page14.txt: [('cap', 'tivity')]
AmSn18990223-V14-08-page14.txt: [('cap', 'tivity')]
AmSn18990302-V14-09-page16.txt: [('SEN', 'TINEL')]
AmSn18990302-V14-09-page6.txt: [('ver', 'acity')]
AmSn18990302-V14-09-page9.txt: [('w', 'hich')]
AmSn18990309-V14-10-page1.txt: [('voice', 'ful')]
AmSn18990309-V14-10-page15.txt: [('P', 'RESS')]
AmSn18990309-V14-10-page16.txt: [('SEN', 'TINEL')]
AmSn18990309-V14-10-page6.txt: [('de', 'clared')]
AmSn18990316-V14-11-page15.txt: [('g', 'oo')]
AmSn18990316-V14-11-page5.txt: [('and', 're')]
AmSn18990316-V14-11-page6.txt: [('lib', 'erty')]
AmSn18990323-V14-12-page11.txt: [('en', 'gaged')]
AmSn18990323-V14-12-page15.txt: [('fa', 'mily'), ('O', 'LD')]
AmSn18990323-V14-12-page2.txt: [('gov', 'ernment')]
AmSn18990323-V14-12-page5.txt: [('Christian', 'ity')]
AmSn18990330-V14-13-page10.txt: [('sold', 'iers')]
AmSn18990406-V14-14-page14.txt: [('A', 'bby')]
AmSn18990406-V14-14-page8.txt: [('in', 'vestigations')]
AmSn18990413-V14-15-page14.txt: [('at', 'Li')]
AmSn18990413-V14-15-page16.txt: [('who', 'se')]
AmSn18990420-V14-16-page4.txt: [('min', 'ister')]
AmSn18990427-V14-17-page13.txt: [('chap', 'lains')]
AmSn18990427-V14-17-page8.txt: [('f', 'ollowing')]
AmSn18990504-V14-18-page15.txt: [('scien', 'tific')]
AmSn18990511-V14-19-page14.txt: [('Sanct', 'uary'), ('phys', 'ical'), ('FOR', 'EIGN')]
AmSn18990518-V14-20-page10.txt: [('pro', 'hibit')]
AmSn18990518-V14-20-page14.txt: [('A', 'li'), ('FOR', 'EIGN')]
AmSn18990525-V14-21-page1.txt: [('p', 'erson')]
AmSn18990525-V14-21-page15.txt: [('T', 'iP')]
AmSn18990601-V14-22-page1.txt: [('A', 'verted')]
AmSn18990601-V14-22-page11.txt: [('com', 'pelled')]
AmSn18990601-V14-22-page14.txt: [('tim', 'es'), ('mission', 'ary')]
AmSn18990601-V14-22-page15.txt: [('A', 'rIAN')]
AmSn18990608-V14-23-page13.txt: [('Sanct', 'uary')]
AmSn18990608-V14-23-page15.txt: [('AM', 'ERICAN')]
AmSn18990608-V14-23-page2.txt: [('Ha', 'tley')]
AmSn18990608-V14-23-page3.txt: [('a', 'nd')]
AmSn18990615-V14-24-page13.txt: [('l', 'ee'), ('FOR', 'EIGN'), ('Boa', 'rd')]
AmSn18990615-V14-24-page14.txt: [('A', 'ttention'), ('revo', 'lutions')]
AmSn18990615-V14-24-page6.txt: [('per', 'sonality')]
AmSn18990615-V14-24-page7.txt: [('confer', 'ence'), ('unright', 'eous')]
AmSn18990622-V14-25-page11.txt: [('Chi', 'cago')]
AmSn18990622-V14-25-page15.txt: [('fa', 'ther')]
AmSn18990622-V14-25-page16.txt: [('appear', 'ance')]
AmSn18990706-V14-26-page14.txt: [('A', 'rIAN')]
AmSn18990706-V14-26-page15.txt: [('O', 'ther'), ('SAVONA', 'ROLA')]
AmSn18990706-V14-26-page16.txt: [('SEN', 'TINEL'), ('Chi', 'cago')]
AmSn18990706-V14-26-page4.txt: [('author', 'ity')]
AmSn18990713-V14-27-page15.txt: [('cap', 'tivity')]
AmSn18990720-V14-28-page11.txt: [('c', 'onsideration')]
AmSn18990720-V14-28-page15.txt: [('cap', 'tivity')]
AmSn18990727-V14-29-page13.txt: [('O', 'ver')]
AmSn18990727-V14-29-page14.txt: [('t', 'aken')]
AmSn18990727-V14-29-page15.txt: [('cap', 'tivity'), ('W', 'orld')]
AmSn18990727-V14-29-page3.txt: [('di', 'recting')]
AmSn18990803-V14-30-page15.txt: [('C', 'itation')]
AmSn18990803-V14-30-page8.txt: [('a', 'nd')]
AmSn18990803-V14-30-page9.txt: [('de', 'livered')]
AmSn18990810-V14-31-page15.txt: [('inven', 'tions')]
AmSn18990810-V14-31-page16.txt: [('un', 'derlying')]
AmSn18990810-V14-31-page4.txt: [('Evi', 'dently')]
AmSn18990817-V14-32-page15.txt: [('A', 'lAN')]
AmSn18990824-V14-33-page13.txt: [('le', 'ft')]
AmSn18990824-V14-33-page14.txt: [('C', 'oth')]
AmSn18990824-V14-33-page7.txt: [('pro', 'posals')]
AmSn18990831-V14-34-page12.txt: [('dis', 'tinguished')]
AmSn18990831-V14-34-page14.txt: [('C', 'oth')]
AmSn18990831-V14-34-page15.txt: [('A', 'rIAN')]
AmSn18990907-V14-35-page12.txt: [('Ascend', 'ancy')]
AmSn18990907-V14-35-page3.txt: [('fur', 'ther')]
AmSn18990914-V14-36-page2.txt: [('Roma', 'nism'), ('Phil', 'ippines')]
AmSn18990921-V14-37-page6.txt: [('to', 'iled')]
AmSn18990928-V14-38-page13.txt: [('or', 'cein')]
AmSn18990928-V14-38-page6.txt: [('con', 'nected')]
AmSn18991005-V14-39-page11.txt: [('Com', 'mitting')]
AmSn18991005-V14-39-page4.txt: [('a', 'nd')]
AmSn18991012-V14-40-page14.txt: [('K', 'ANSAS')]
AmSn18991012-V14-40-page15.txt: [('e', 're')]
AmSn18991012-V14-40-page2.txt: [('SEN', 'TINEL')]
AmSn18991019-V14-41-page14.txt: [('b', 'aptist')]
AmSn18991019-V14-41-page6.txt: [('earn', 'estness')]
AmSn18991026-V14-42-page11.txt: [('equal', 'ity')]
AmSn18991026-V14-42-page15.txt: [('In', 'terpretation')]
AmSn18991026-V14-42-page2.txt: [('move', 'ment')]
AmSn18991026-V14-42-page4.txt: [('la', 'xer')]
AmSn18991026-V14-42-page6.txt: [('cat', 'echisms'), ('d', 'ay')]
AmSn18991026-V14-42-page9.txt: [('a', 'nd')]
AmSn18991102-V14-43-page11.txt: [('cir', 'culated')]
AmSn18991102-V14-43-page14.txt: [('Y', 'OE')]
AmSn18991102-V14-43-page16.txt: [('Eng', 'lish')]
AmSn18991102-V14-43-page2.txt: [('con', 'gress')]
AmSn18991102-V14-43-page3.txt: [('and', 're')]
AmSn18991102-V14-43-page9.txt: [('and', 'rE')]
AmSn18991109-V14-44-page2.txt: [('j', 'ournal')]
AmSn18991116-V14-45-page16.txt: [('e', 're')]
AmSn18991116-V14-45-page17.txt: [('Chris', 'tians')]
AmSn18991123-V14-46-page13.txt: [('E', 'xamination')]
AmSn18991123-V14-46-page2.txt: [('com', 'manded')]
AmSn18991130-V14-47-page5.txt: [('t', 'hese')]
AmSn18991130-V14-47-page7.txt: [('AME', 'RICAN')]
AmSn18991207-V14-48-page12.txt: [('be', 'na')]
AmSn18991214-V14-49-page2.txt: [('Govern', 'ment')]
AmSn18991228-V14-50-page12.txt: [('We', 'll')]
AmSn18991228-V14-50-page16.txt: [('SEN', 'TINEL')]
AmSn19000104-V15-01-page10.txt: [('j', 'ustification')]
AmSn19000104-V15-01-page14.txt: [('Bo', 'nd')]
AmSn19000104-V15-01-page2.txt: [('t', 'ent')]
AmSn19000104-V15-01-page7.txt: [('aggrand', 'izement')]
AmSn19000104-V15-01-page9.txt: [('Amend', 'ment')]
AmSn19000111-V15-02-page11.txt: [('inter', 'fering')]
AmSn19000118-V15-03-page13.txt: [('C', 'loth')]
AmSn19000118-V15-03-page2.txt: [('the', 're')]
AmSn19000125-V15-04-page10.txt: [('Con', 'trary')]
AmSn19000201-V15-05-page1.txt: [('f', 'ollows')]
AmSn19000208-V15-06-page10.txt: [('repre', 'sented')]
AmSn19000208-V15-06-page11.txt: [('Pro', 'Tem')]
AmSn19000208-V15-06-page2.txt: [('a', 'ny')]
AmSn19000208-V15-06-page3.txt: [('sym', 'pathy')]
AmSn19000215-V15-07-page14.txt: [('Work', 'ers')]
AmSn19000215-V15-07-page15.txt: [('O', 'kie')]
AmSn19000215-V15-07-page9.txt: [('c', 'om')]
AmSn19000222-V15-08-page16.txt: [('SEN', 'TINEL')]
AmSn19000301-V15-09-page10.txt: [('Gov', 'ERNOR')]
AmSn19000301-V15-09-page13.txt: [('d', 'iet')]
AmSn19000308-V15-10-page14.txt: [('A', 'IL')]
AmSn19000308-V15-10-page2.txt: [('in', 'struction'), ('fr', 'ee')]
AmSn19000315-V15-11-page16.txt: [('differ', 'ences')]
AmSn19000322-V15-12-page14.txt: [('phys', 'ical')]
AmSn19000322-V15-12-page5.txt: [('en', 'forcement')]
AmSn19000329-V15-13-page9.txt: [('Shan', 'Tung')]
AmSn19000405-V15-14-page2.txt: [('Bap', 'tist')]
AmSn19000405-V15-14-page5.txt: [('nigh', 'tly')]
AmSn19000412-V15-15-page12.txt: [('con', 'cern')]
AmSn19000419-V15-16-page15.txt: [('right', 'eousness')]
AmSn19000426-V15-17-page8.txt: [('command', 'Ment')]
AmSn19000426-V15-17-page9.txt: [('sen', 'timent')]
AmSn19000510-V15-18-page10.txt: [('for', 'th')]
AmSn19000510-V15-18-page14.txt: [('lib', 'erty')]
AmSn19000510-V15-18-page16.txt: [('SEN', 'TINEL'), ('E', 'TC')]
AmSn19000510-V15-18-page2.txt: [('or', 'ganization')]
AmSn19000524-V15-20-page9.txt: [('con', 'quering')]
AmSn19000531-V15-21-page12.txt: [('Fur', 'ther')]
AmSn19000607-V15-22-page1.txt: [('t', 'ee')]
AmSn19000607-V15-22-page5.txt: [('Is', 'rael')]
AmSn19000607-V15-22-page8.txt: [('r', 'oo')]
AmSn19000614-V15-23-page6.txt: [('a', 'pparently')]
AmSn19000628-V15-25-page5.txt: [('PRO', 'GRESSIVE')]
AmSn19000712-V15-27-page12.txt: [('Kiang', 'Si'), ('Kiang', 'Su')]
AmSn19000712-V15-27-page13.txt: [('E', 'RT')]
AmSn19000719-V15-28-page12.txt: [('u', 'nwarranted')]
AmSn19000719-V15-28-page2.txt: [('Refor', 'mation'), ('the', 'Refor')]
AmSn19000719-V15-28-page6.txt: [('men', 'tioned'), ('in', 'dividuals'), ('relin', 'quishes'), ('and', 're'), ('ac', 'quired')]
AmSn19000719-V15-28-page7.txt: [('lib', 'erty'), ('Chris', 'tian')]
AmSn19000719-V15-28-page8.txt: [('perse', 'cutions')]
AmSn19000726-V15-29-page2.txt: [('em', 'inent'), ('an', 'em')]
AmSn19000726-V15-29-page9.txt: [('a', 'ppointed')]
AmSn19000802-V15-30-page13.txt: [('th', 'eir')]
AmSn19000802-V15-30-page16.txt: [('Chris', 'tendom')]
AmSn19000802-V15-30-page5.txt: [('mon', 'archy')]
AmSn19000809-V15-31-page10.txt: [('relig', 'ious')]
AmSn19000809-V15-31-page12.txt: [('on', 'es'), ('g', 'od'), ('con', 'gregation')]
AmSn19000809-V15-31-page16.txt: [('I', 'ndependence')]
AmSn19000809-V15-31-page8.txt: [('Pres', 'ent')]
AmSn19000816-V15-32-page1.txt: [('e', 'ra')]
AmSn19000816-V15-32-page16.txt: [('LIB', 'ERTY')]
AmSn19000816-V15-32-page6.txt: [('utter', 'ance')]
AmSn19000823-V15-33-page13.txt: [('au', 'thorizing')]
AmSn19000823-V15-33-page14.txt: [('val', 'uable')]
AmSn19000823-V15-33-page16.txt: [('ob', 'liged')]
AmSn19000823-V15-33-page6.txt: [('Meth', 'odists')]
AmSn19000830-V15-34-page12.txt: [('or', 'dered')]
AmSn19000830-V15-34-page2.txt: [('r', 'ights')]
AmSn19000906-V15-35-page10.txt: [('con', 'cerned')]
AmSn19000906-V15-35-page11.txt: [('an', 'ything'), ('car', 'ried'), ('the', 're')]
AmSn19000906-V15-35-page12.txt: [('in', 'terpOsed')]
AmSn19000906-V15-35-page15.txt: [('THE', 'SE')]
AmSn19000906-V15-35-page16.txt: [('dis', 'tinguish')]
AmSn19000906-V15-35-page2.txt: [('e', 'xistence')]
AmSn19000906-V15-35-page6.txt: [('live', 'th')]
AmSn19000913-V15-36-page14.txt: [('Nor', 'theastern')]
AmSn19000920-V15-37-page16.txt: [('THE', 'SE')]
AmSn19000928-V15-38-page15.txt: [('W', 'IZ')]
AmSn19000928-V15-38-page16.txt: [('institu', 'tions')]
AmSn19000928-V15-38-page6.txt: [('Conscien', 'ce')]
AmSn19001004-V15-39-page10.txt: [('vici', 'ous')]
AmSn19001004-V15-39-page14.txt: [('subscrip', 'tion')]
AmSn19001004-V15-39-page2.txt: [('and', 'es'), ('es', 'pecially')]
AmSn19001004-V15-39-page5.txt: [('cru', 'cified')]
AmSn19001011-V15-40-page11.txt: [('gov', 'ernment')]
AmSn19001011-V15-40-page15.txt: [('Y', 'ou')]
AmSn19001011-V15-40-page6.txt: [('r', 'esult')]
AmSn19001018-V15-41-page10.txt: [('in', 'TI'), ('gro', 'und')]
AmSn19001018-V15-41-page14.txt: [('SEN', 'TINEL')]
AmSn19001018-V15-41-page16.txt: [('the', 'se')]
AmSn19001018-V15-41-page2.txt: [('on', 'es'), ('govern', 'ment')]
AmSn19001018-V15-41-page4.txt: [('ad', 'justed'), ('a', 'lWays'), ('suc', 'ceeding')]
AmSn19001018-V15-41-page5.txt: [('mo', 'rality'), ('phi', 'losophy')]
AmSn19001018-V15-41-page8.txt: [('exer', 'cise')]
AmSn19001025-V15-42-page10.txt: [('condi', 'tions')]
AmSn19001025-V15-42-page11.txt: [('interfer', 'ence'), ('settle', 'ment')]
AmSn19001025-V15-42-page12.txt: [('be', 'lieve')]
AmSn19001025-V15-42-page13.txt: [('men', 'tion')]
AmSn19001025-V15-42-page14.txt: [('SEN', 'TINEL'), ('subscrip', 'tion')]
AmSn19001025-V15-42-page16.txt: [('Sund', 'ay'), ('Nu', 'NN')]
AmSn19001025-V15-42-page4.txt: [('ob', 'viously'), ('per', 'sisted')]
AmSn19001025-V15-42-page7.txt: [('guar', 'antee')]
AmSn19001101-V15-43-page12.txt: [('frequent', 'ers'), ('BaP', 'tists')]
AmSn19001101-V15-43-page13.txt: [('disC', 'ouraged')]
AmSn19001101-V15-43-page14.txt: [('L', 'iberty')]
AmSn19001101-V15-43-page16.txt: [('de', 'mand')]
AmSn19001101-V15-43-page5.txt: [('wor', 'shiping'), ('dic', 'tates')]
AmSn19001101-V15-43-page6.txt: [('mission', 'ary')]
AmSn19001108-V15-44-page15.txt: [('SEN', 'TINEL'), ('A', 'DDRESS')]
AmSn19001108-V15-44-page16.txt: [('enforce', 'ment')]
AmSn19001108-V15-44-page5.txt: [('re', 'joicings'), ('the', 're')]
AmSn19001115-V15-45-page1.txt: [('e', 'CO'), ('a', 'll'), ('a', 'ssumed')]
AmSn19001115-V15-45-page13.txt: [('federa', 'tion')]
AmSn19001115-V15-45-page14.txt: [('hand', 'somely')]
AmSn19001115-V15-45-page16.txt: [('ad', 'vocateS')]
AmSn19001115-V15-45-page6.txt: [('gov', 'ernments'), ('Cath', 'olics')]
AmSn19001122-V15-46-page10.txt: [('N', 'eVertheless')]
AmSn19001122-V15-46-page11.txt: [('depart', 'ment'), ('in', 'dicated')]
AmSn19001122-V15-46-page13.txt: [('ques', 'tion')]
AmSn19001122-V15-46-page16.txt: [('m', 'ab')]
AmSn19001122-V15-46-page2.txt: [('gov', 'erned')]
AmSn19001122-V15-46-page7.txt: [('h', 'oy')]
AmSn19001129-V15-47-page11.txt: [('to', 're')]
AmSn19001129-V15-47-page13.txt: [('pene', 'trated'), ('per', 'sonal')]
AmSn19001129-V15-47-page15.txt: [('n', 'eeds')]
AmSn19001129-V15-47-page16.txt: [('a', 're')]
AmSn19001129-V15-47-page4.txt: [('en', 'largeth')]
AmSn19001206-V15-48-page3.txt: [('the', 'Es')]
AmSn19001206-V15-48-page6.txt: [('Cong', 'resSman'), ('Con', 'gress')]
AmSn19001206-V15-48-page7.txt: [('pri', 'marily'), ('cer', 'tainly'), ('no', 'es')]
AmSn19001206-V15-48-page8.txt: [('Vir', 'ginia')]
AmSn19001220-V15-50-page12.txt: [('c', 'urch'), ('or', 'ganized')]
AmSn19001220-V15-50-page6.txt: [('Massa', 'chusetts'), ('colo', 'nies'), ('state', 'ment')]
In [32]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction7

Average verified rate: 0.9830117359134304

Average of error rates: 0.018671590569979114

Total token count: 8363303

In [33]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )
Out[33]:
[("'", 8053),
 ('t', 4426),
 ('e', 3978),
 ('d', 3950),
 ('w', 3754),
 ('co', 3408),
 ('m', 3176),
 ('n', 3046),
 ('f', 2035),
 ('r', 2028),
 ('th', 1645),
 ('g', 1371),
 ('mo', 1160),
 ('u', 926),
 ('x', 864),
 ('ex', 521),
 ('pa', 410),
 ('q', 399),
 ('sunday-law', 334),
 ('k', 315),
 ("the'", 304),
 ('pp', 299),
 ('tion', 276),
 ("conscience'", 260),
 ('ch', 253),
 ('seventhday', 249),
 ('re', 224),
 ('ga', 220),
 ('oc', 218),
 ('z', 215),
 ('wm', 215),
 ('satolli', 210),
 ('employes', 209),
 ('munn', 207),
 ('ti', 200),
 ('id', 181),
 ('un', 173),
 ('ry', 170),
 ('al', 166),
 ('sunday-closing', 160),
 ('ca', 151),
 ('ment', 146),
 ('chain-gang', 136),
 ("to'", 134),
 ('nd', 130),
 ('ll', 128),
 ('lb', 125),
 ('il', 123),
 ('bateham', 122),
 ('cmsar', 121),
 ('se', 120),
 ('aleck', 112),
 ("and'", 109),
 ('socalled', 106),
 ('sunday-rest', 104),
 ('sentin', 104),
 ('milly', 103),
 ('cc', 101),
 ('te', 101),
 ('erican', 99),
 ("of'", 98),
 ('va', 95),
 ('nt', 92),
 ('fellow-citizens', 92),
 ('vt', 92),
 ('cd', 92),
 ('tt', 89),
 ('aa', 89),
 ('op', 89),
 ('ft', 88),
 ('-', 88),
 ("a'", 86),
 ('eze', 84),
 ('attorney-general', 83),
 ('ma', 82),
 ('csar', 81),
 ('stundists', 80),
 ('cereola', 79),
 ('lc', 79),
 ('neander', 78),
 ('religio-political', 76),
 ('mc', 74),
 ('law-abiding', 74),
 ('sundaylaw', 74),
 ('rican', 73),
 ('tions', 72),
 ('ay', 72),
 ('li', 72),
 ('edmunds', 71),
 ('ni', 71),
 ('ra', 71),
 ("crafts's", 70),
 ('rd', 70),
 ('ia', 70),
 ("is'", 69),
 ('sr', 69),
 ('freethought', 68),
 ("in'", 67),
 ('si', 67),
 ("crafts'", 66),
 ("folks'", 66),
 ("cmsar's", 66),
 ('un-american', 64),
 ('sabbath-day', 63),
 ('rest-day', 62),
 ('sabbath-breaking', 60),
 ("''", 59),
 ('pr', 58),
 ('mt', 58),
 ("'s", 58),
 ('ic', 57),
 ('ac', 57),
 ('ne', 56),
 ('candidus', 56),
 ("barbers'", 56),
 ('paeifie', 56),
 ('na', 55),
 ('tregelles', 54),
 ('geikie', 54),
 ('ie', 54),
 ("that'", 54),
 ('ican', 53),
 ("an'", 53),
 ('ity', 53),
 ('dred', 53),
 ('employe', 52),
 ("it'", 52),
 ('ky', 51),
 ('assoeiation', 51),
 ('ob', 51),
 ('litt', 51),
 ('ri', 50),
 ('ernment', 50),
 ('wellknown', 50),
 ('coxey', 50),
 ('ci', 50),
 ('ofthe', 49),
 ('tional', 49),
 ('ce', 49),
 ('es', 49),
 ('ments', 49),
 ('vo', 48),
 ('aro', 48),
 ('pre', 47),
 ('fa', 47),
 ('ph', 47),
 ('cathedra', 46),
 ("jones'", 46),
 ('ow', 45),
 ('leiper', 45),
 ('ro', 45),
 ('forit', 45),
 ('ent', 45),
 ("citizens'", 44),
 ('mi', 43),
 ('sundayclosing', 43),
 ('judefind', 42),
 ('ple', 42),
 ('sh', 42),
 ('erty', 42),
 ('times-herald', 42),
 ('chaingang', 42),
 ('tischendorf', 42),
 ('ei', 41),
 ('bo', 41),
 ("o'keefe", 41),
 ('hiberty', 41),
 ('cr', 41),
 ("be'", 41),
 ("law'", 40),
 ('wo', 40),
 ('sunday-keeping', 40),
 ('ea', 40),
 ('yo', 40),
 ('chapelle', 40),
 ('ct', 40),
 ('tinel', 40),
 ('em', 40),
 ("i'", 39),
 ('ip', 39),
 ('keane', 39),
 ('tv', 39),
 ('copygraph', 39),
 ("waterman's", 38),
 ('lachmann', 38),
 ('tian', 38),
 ('oi', 38),
 ('kai', 38),
 ('ica', 38),
 ("cruden's", 38),
 ('ers', 37),
 ('non-sectarian', 37),
 ('adress', 37),
 ("csar's", 37),
 ('saye', 37),
 ('church-and-state', 37),
 ("american'", 37),
 ('coxe', 36),
 ('ta', 36),
 ('io', 36),
 ('tr', 36),
 ('dwyer', 36),
 ('oa', 35),
 ("for'", 35),
 ("not'", 35),
 ('pf', 35),
 ('tir', 35),
 ('td', 35),
 ('mass-meeting', 35),
 ('swiggart', 35),
 ("this'", 35),
 ('fi', 35),
 ('az', 35),
 ('law-making', 34),
 ('ance', 34),
 ('da', 34),
 ('first-day', 34),
 ('jagoe', 34),
 ('inthe', 34),
 ('godgiven', 34),
 ("church'", 34),
 ('brunot', 33),
 ('ther', 33),
 ('cwsar', 33),
 ('gious', 33),
 ('entinel', 33),
 ('eh', 33),
 ('cl', 33),
 ('sa', 32),
 ('ss', 32),
 ("as'", 32),
 ('vice-presidents', 32),
 ('base-ball', 32),
 ('ap', 32),
 ('stitution', 32),
 ("infants'", 32),
 ('ba', 32),
 ('saloon-keepers', 32),
 ('oo', 31),
 ('lt', 31),
 ('sun-worship', 31),
 ("liberty'", 31),
 ('ts', 31),
 ('one-seventh', 31),
 ('rt', 31),
 ('prayer-meeting', 31),
 ('slattery', 31),
 ('colitical', 30),
 ('efical', 30),
 ('mn', 30),
 ('rs', 30),
 ('vox', 30),
 ('fr', 30),
 ('ao', 29),
 ('os', 29),
 ('mg', 29),
 ("are'", 29),
 ("roberts'", 29),
 ('ge', 29),
 ('tc', 29),
 ('rn', 29),
 ('kauffman', 29),
 ('fo', 29),
 ('ns', 29),
 ('crowther', 29),
 ("workingmen's", 29),
 ('puplishing', 29),
 ("all'", 29),
 ('holidayism', 28),
 ('oe', 28),
 ('self-preservation', 28),
 ('street-cars', 28),
 ("parkhurst's", 28),
 ('charta', 28),
 ('dei', 28),
 ('newyork', 28),
 ('durborow', 28),
 ('liberty-loving', 28),
 ('zwiebach', 28),
 ("fathers'", 28),
 ('non-catholics', 28),
 ('tl', 28),
 ('fair-minded', 27),
 ("krug's", 27),
 ('merican', 27),
 ("sabbath'", 27),
 ('non-observance', 27),
 ('ful', 27),
 ('iu', 27),
 ('schurman', 27),
 ("cushing's", 27),
 ('mccauley', 27),
 ('self-defense', 27),
 ('theo', 27),
 ('fellow-man', 27),
 ("gibbons'", 27),
 ("or'", 26),
 ('counter-arguments', 26),
 ('sabbathkeeping', 26),
 ('ablegate', 26),
 ('fora', 26),
 ('fide', 26),
 ('platt', 26),
 ('thon', 26),
 ("with'", 26),
 ('itis', 26),
 ('hto', 26),
 ('bula', 26),
 ("god'", 26),
 ('ve', 26),
 ("which'", 26),
 ('pecci', 26),
 ('divinely-appointed', 26),
 ('ae', 26),
 ('non-religious', 26),
 ("by'", 26),
 ('selfgovernment', 26),
 ('ence', 25),
 ("lions'", 25),
 ('weakley', 25),
 ('eferson', 25),
 ('nethinim', 25),
 ('weyler', 25),
 ('feligious', 25),
 ('ig', 25),
 ('ou', 25),
 ('fah', 25),
 ("d'aubigne", 25),
 ('martinelli', 25),
 ('tn', 25),
 ("at'", 25),
 ('sundayschool', 25),
 ('ous', 25),
 ('ridpath', 25),
 ('tne', 25),
 ('publishinc', 25),
 ('loth', 24),
 ('krug', 24),
 ('ceesar', 24),
 ('stuttle', 24),
 ('mehan', 24),
 ('tothe', 24),
 ('tiie', 24),
 ('ligion', 24),
 ("preachers'", 24),
 ("if'", 24),
 ('phelan', 24),
 ('ws', 24),
 ('ut', 23),
 ('humbert', 23),
 ('lawabiding', 23),
 ('twentyfive', 23),
 ('atterbury', 23),
 ('sunday-sabbath', 23),
 ('nn', 23),
 ('arierican', 23),
 ('ble', 23),
 ("saints'", 23),
 ('ber', 23),
 ('om', 23),
 ('ious', 23),
 ('tbe', 22),
 ('anb', 22),
 ('ili', 22),
 ('ef', 22),
 ('ib', 22),
 ('bt', 22),
 ('tb', 22),
 ('ligious', 22),
 ("have'", 22),
 ('ab', 22),
 ('scudder', 22),
 ('pany', 22),
 ('sel', 22),
 ('wi', 22),
 ('gi', 22),
 ('anti-christian', 22),
 ("we'", 22),
 ('jeferson', 22),
 ('xact', 21),
 ("pub'rs", 21),
 ("grocers'", 21),
 ("e'", 21),
 ('comegys', 21),
 ('scovel', 21),
 ('sevent', 21),
 ('po', 21),
 ('mee', 21),
 ('witham', 21),
 ('thos', 21),
 ('ng', 21),
 ('yefferson', 21),
 ('ive', 21),
 ("sunday'", 21),
 ('notgive', 21),
 ('postmaster-general', 20),
 ("hutchings'", 20),
 ('washburne', 20),
 ("he'", 20),
 ('religiopolitical', 20),
 ('kellog', 20),
 ('romer', 20),
 ('healthgiving', 20),
 ("soldiers'", 20),
 ("satolli's", 20),
 ('mcglynn', 20),
 ('sien', 20),
 ('ject', 20),
 ("from'", 20),
 ('ee', 20),
 ("hours'", 20),
 ('ary', 20),
 ('pt', 20),
 ('anierican', 20),
 ('rosemond', 20),
 ("vick's", 20),
 ('parens', 20),
 ('bythe', 20),
 ("on'", 20),
 ('longnecker', 20),
 ("was'", 20),
 ("printers'", 19),
 ('governor-general', 19),
 ('anierica', 19),
 ('pressense', 19),
 ('fbr', 19),
 ('micr', 19),
 ('everts', 19),
 ('rorabacher', 19),
 ("pastors'", 19),
 ("gov't", 19),
 ('tp', 19),
 ('iti', 19),
 ('rr', 19),
 ('seelye', 19),
 ('arther', 19),
 ('wishart', 19),
 ("people'", 19),
 ('cosgrove', 19),
 ('gt', 19),
 ('det', 19),
 ('lery', 19),
 ('abbe', 19),
 ('stundist', 19),
 ("day'", 19),
 ('hagans', 19),
 ('montefiore', 19),
 ("will'", 19),
 ('chain-gangs', 19),
 ('law-makers', 19),
 ('sundaykeeping', 18),
 ('dc', 18),
 ('reli', 18),
 ('tae', 18),
 ('od', 18),
 ('enright', 18),
 ('anti-catholic', 18),
 ('non-interference', 18),
 ('tht', 18),
 ('sas', 18),
 ('oz', 18),
 ('efferson', 18),
 ('ible', 18),
 ("th'", 18),
 ('tianity', 18),
 ('tarawera', 18),
 ('curlett', 18),
 ('tii', 18),
 ('ey', 18),
 ('tolstoi', 18),
 ('wa', 18),
 ('self-styled', 18),
 ('--', 18),
 ("would'", 18),
 ('ccesar', 18),
 ('oity', 18),
 ('avery-stuttle', 18),
 ('nnw', 17),
 ('mal', 17),
 ('bf', 17),
 ('prin', 17),
 ("righteousness'", 17),
 ('jt', 17),
 ('clingman', 17),
 ('cedarquist', 17),
 ('newyorkcity', 17),
 ('tra', 17),
 ('ricans', 17),
 ('saloon-keeper', 17),
 ('rubiana', 17),
 ('eral', 17),
 ('prisot', 17),
 ('post-offices', 17),
 ('theunited', 17),
 ('ies', 17),
 ('nu', 17),
 ('ol', 17),
 ("no'", 17),
 ('fl', 17),
 ('sabbathbreaking', 17),
 ("a'nan", 17),
 ('ress', 17),
 ('sommerville', 17),
 ('ation', 17),
 ('church-going', 17),
 ('cood', 17),
 ('mullally', 17),
 ('self-governing', 17),
 ('nel', 17),
 ('um', 17),
 ('bondst', 17),
 ('philpott', 17),
 ('law-breaker', 17),
 ('ik', 17),
 ('senti', 17),
 ('ame', 17),
 ('leivites', 17),
 ('pel', 17),
 ("apostles'", 17),
 ('hy', 17),
 ("schaff's", 16),
 ('dieu', 16),
 ('selfevident', 16),
 ('dayto', 16),
 ('ioo', 16),
 ('tf', 16),
 ('prepartion', 16),
 ('cp', 16),
 ("enright's", 16),
 ("his'", 16),
 ('mit', 16),
 ('relig', 16),
 ('thepeople', 16),
 ('sie', 16),
 ('alfaro', 16),
 ('symmachus', 16),
 ('xl', 16),
 ('ples', 16),
 ('facto', 16),
 ('erromanga', 16),
 ('sunday-keepers', 16),
 ('dividual', 16),
 ('peryear', 16),
 ('peffer', 16),
 ('re-enact', 16),
 ('ish', 16),
 ('socialpurity', 16),
 ('ith', 16),
 ('cs', 16),
 ('wilkie', 16),
 ("l'", 16),
 ('ul', 16),
 ('hodgson', 16),
 ('basle', 16),
 ('bas', 16),
 ('eousness', 16),
 ('zi', 15),
 ("who'", 15),
 ('ite', 15),
 ('sabbath-breakers', 15),
 ('americansentinel', 15),
 ('ag', 15),
 ('rhe', 15),
 ('nonsuch', 15),
 ('pepsia', 15),
 ('co-workers', 15),
 ('gallinger', 15),
 ('labberton', 15),
 ('thatthe', 15),
 ('intrust', 15),
 ('lttra', 15),
 ('ork', 15),
 ('aw', 15),
 ('law-breakers', 15),
 ('milman', 15),
 ('rampolla', 15),
 ("christian'", 15),
 ('wellbeing', 15),
 ("milman's", 15),
 ('klip', 15),
 ('bi', 15),
 ('ons', 15),
 ('ctesar', 15),
 ("their'", 15),
 ('re-enacted', 15),
 ('populi', 15),
 ('governinent', 15),
 ('wor', 15),
 ('hach', 15),
 ('sc', 15),
 ("miles'", 15),
 ("ginn's", 15),
 ('ih', 15),
 ('janes', 15),
 ('ov', 15),
 ('sulus', 15),
 ('stinday', 15),
 ('xo', 15),
 ('ist', 15),
 ('ectarian', 15),
 ("o'gorman", 15),
 ('tkt', 15),
 ("they'", 15),
 ('alvierica', 15),
 ('je', 15),
 ('birney', 15),
 ("religion'", 15),
 ('avery-stiittle', 15),
 ('sf', 15),
 ('np', 14),
 ('thb', 14),
 ('qa', 14),
 ("pres'ts", 14),
 ('secker', 14),
 ('intelligeneer', 14),
 ("'the", 14),
 ('yeferson', 14),
 ('self-exaltation', 14),
 ("tourists'", 14),
 ('chiniquy', 14),
 ('rittenhouse', 14),
 ('ormore', 14),
 ("moses'", 14),
 ('peo', 14),
 ('goverment', 14),
 ('plete', 14),
 ('lished', 14),
 ('thp', 14),
 ("sup'ts", 14),
 ("gault's", 14),
 ('oth', 14),
 ('cortlandt', 14),
 ('non-union', 14),
 ('br', 14),
 ("ccesar's", 14),
 ('anglo-saxons', 14),
 ('christain', 14),
 ('restday', 14),
 ('su', 14),
 ('nr', 14),
 ('rv', 14),
 ('eemperance', 14),
 ('sabbath-breaker', 14),
 ('gb', 14),
 ('tains', 14),
 ("mcallister's", 14),
 ('malum', 14),
 ("williams'", 14),
 ("neander's", 14),
 ("adventists'", 14),
 ('lexow', 14),
 ('confreres', 14),
 ('thr', 14),
 ('ncluding', 14),
 ('af', 14),
 ('sient', 14),
 ('tution', 14),
 ('gl', 14),
 ('tennesseeans', 14),
 ('mu', 14),
 ("but'", 13),
 ('fon', 13),
 ('ver', 13),
 ('christ-like', 13),
 ('aivierican', 13),
 ('leaguers', 13),
 ('wu', 13),
 ("mf'g", 13),
 ('hoc', 13),
 ('dibbs', 13),
 ('anti-religious', 13),
 ('ntinel', 13),
 ('ual', 13),
 ('themies', 13),
 ('dont', 13),
 ('ex-president', 13),
 ('gr', 13),
 ("one'", 13),
 ('two-horned', 13),
 ('rp', 13),
 ("coxey's", 13),
 ('higinbotham', 13),
 ("t'", 13),
 ("protestants'", 13),
 ('pilman', 13),
 ('froni', 13),
 ('foi', 13),
 ('meeting-house', 13),
 ('mccourt', 13),
 ('thd', 13),
 ('waupon', 13),
 ("f'", 13),
 ("has'", 13),
 ('forthe', 13),
 ('itt', 13),
 ('hiscock', 13),
 ('sp', 13),
 ('self-contradictory', 13),
 ("torry's", 13),
 ('cif', 13),
 ("its'", 13),
 ('dn', 13),
 ('princi', 13),
 ('cer', 13),
 ('thi', 13),
 ('ec', 13),
 ('hee', 13),
 ('sabbathkeepers', 13),
 ('lelvites', 13),
 ('one-man', 13),
 ('tms', 13),
 ('rundschau', 13),
 ('tlie', 13),
 ('tax-payers', 13),
 ('non-christian', 13),
 ('self-appointed', 13),
 ("breeders'", 13),
 ('kossean', 13),
 ('olesen', 13),
 ('botkine', 13),
 ('ny', 13),
 ('volksraad', 13),
 ("whaley's", 12),
 ('tem', 12),
 ('constitu', 12),
 ('ess', 12),
 ('froin', 12),
 ('robb', 12),
 ('theire', 12),
 ('thein', 12),
 ('ors', 12),
 ('ical', 12),
 ('chappelle', 12),
 ("churches'", 12),
 ('self-constituted', 12),
 ('wouldbe', 12),
 ('entin', 12),
 ('week-day', 12),
 ('thority', 12),
 ('fast-day', 12),
 ("were'", 12),
 ('ex-mayor', 12),
 ('fortynine', 12),
 ('ture', 12),
 ('bok', 12),
 ('whitall', 12),
 ("cents'", 12),
 ('sition', 12),
 ('tte', 12),
 ('self-interest', 12),
 ('croker', 12),
 ("d'aubigne's", 12),
 ('merous', 12),
 ('cai', 12),
 ('combatting', 12),
 ('observa', 12),
 ('fp', 12),
 ('yonx', 12),
 ('gainst', 12),
 ("such'", 12),
 ('ht', 12),
 ('ds', 12),
 ('masse', 12),
 ('self-respecting', 12),
 ('pc', 12),
 ('ivierican', 12),
 ('tobe', 12),
 ("do'", 12),
 ("christ'", 12),
 ('ki', 12),
 ('ddress', 12),
 ("neat's", 12),
 ('twenty-fifth', 12),
 ('ttin', 12),
 ('maurer', 12),
 ('bondstreet', 12),
 ('inter-state', 12),
 ('lation', 12),
 ('ang', 12),
 ("any'", 12),
 ('rk', 12),
 ('gx', 12),
 ('sunday-observance', 12),
 ('havergal', 11),
 ("james'", 11),
 ('olic', 11),
 ('thechurch', 11),
 ('sm', 11),
 ('cz', 11),
 ('df', 11),
 ("dealers'", 11),
 ('ke', 11),
 ('ets', 11),
 ('pm', 11),
 ('ex-senator', 11),
 ('lieve', 11),
 ('uncompromis', 11),
 ('mm', 11),
 ('ine', 11),
 ('sherk', 11),
 ('fifty-second', 11),
 ('selfpreservation', 11),
 ('derstanding', 11),
 ('naw', 11),
 ('tre', 11),
 ("states'", 11),
 ('theni', 11),
 ("state'", 11),
 ('communica', 11),
 ('rose-wood', 11),
 ('androscoggin', 11),
 ("bakers'", 11),
 ('sk', 11),
 ('taschereau', 11),
 ('qt', 11),
 ('tm', 11),
 ('griffitts', 11),
 ('fellow-workers', 11),
 ('kw', 11),
 ('bradfield', 11),
 ('houk', 11),
 ('fot', 11),
 ("so'", 11),
 ("'a", 11),
 ('amyot', 11),
 ('muskoka', 11),
 ('pl', 11),
 ('aivierica', 11),
 ('erties', 11),
 ('qf', 11),
 ('haye', 11),
 ('ost', 11),
 ('isthepapacyinprophecy', 11),
 ('sev', 11),
 ('rian', 11),
 ('mits', 11),
 ('notre', 11),
 ('key-note', 11),
 ('hirsch', 11),
 ('sealings', 11),
 ('rin', 11),
 ('evil-doers', 11),
 ('nott', 11),
 ("civil'", 11),
 ('theseventh', 11),
 ('ml', 11),
 ('kee', 11),
 ('yr', 11),
 ('gress', 11),
 ('ex-governor', 11),
 ('cramer', 11),
 ('lr', 11),
 ('fs', 11),
 ('informations', 11),
 ('paoipio', 11),
 ('twentyfour', 11),
 ('ridgetown', 11),
 ('axact', 11),
 ('times-democrat', 11),
 ('tians', 11)]

Review Remaining Errors

In [34]:
reports.docs_with_high_error_rate( summary , min_error_rate = .2 )
Out[34]:
[('AmSn18900918-V05-37-page4.txt', 0.495),
 ('AmSn18900918-V05-37-page1.txt', 0.472),
 ('AmSn18900918-V05-37-page8.txt', 0.459),
 ('AmSn18900918-V05-37-page5.txt', 0.413),
 ('AmSn18970701-V12-26-page1.txt', 0.404),
 ('AmSn18980616-V13-24-page15.txt', 0.371),
 ('AmSn18980630-V13-26-page15.txt', 0.356),
 ('AmSn18980609-V13-23-page15.txt', 0.351),
 ('AmSn18980623-V13-25-page15.txt', 0.335),
 ('AmSn18980714-V13-27-page15.txt', 0.325),
 ('AmSn18960220-V11-08-page7.txt', 0.262),
 ('AmSn18971007-V12-39-page16.txt', 0.246),
 ('AmSn18971014-V12-40-page16.txt', 0.238),
 ('AmSn18951031-V10-43-page7.txt', 0.224),
 ('AmSn18951219-V10-50-page7.txt', 0.215),
 ('AmSn18951024-V10-42-page7.txt', 0.204)]
In [35]:
docs_2_check = [x[0] for x in reports.docs_with_high_error_rate( summary , min_error_rate = .2 ) if x[1] > 0.2]
In [36]:
# utilities.open_original_docs(docs_2_check, directories['cycle'])

There are two main drivers of the OCR errors. First, the original scans of AmSn18900918-V05-37 captured either the layer behind or the shadow of text on the next page. This makes clean OCR all but impossible from that scan. The other major driver of errors is the advertisement sections of the publications, particularly ads for an Interlinear Greek New Testament.

Check Long Errors

In [37]:
reports.long_errors(errors_summary, min_length=15)
Out[37]:
(['intensely-orthodox',
  'rezteoxfebeiloenir',
  'virreasetiabwayi',
  'pleasant-spirited',
  'eheapserviceable',
  'poreversepalialf',
  'estabtablishment',
  'pfopositionbliat',
  'christiancitizenship',
  'cliiynirtreczenanmouesna',
  'themonthlypaymentwillbe',
  'choochee-choochee',
  "sup'ercalendered",
  'laicciohuasrmaniangstsrtsltyittzfhefrir',
  'massachusetts--mr',
  'malrithinrinodths',
  'gamblingfraternity',
  'theoriesodfisease',
  'sabbath-profanation',
  'subscripmountains',
  'forty-eight-page',
  'couldfollybegreaterthanpraying',
  'increasing-favor',
  "considerable'part",
  'non-establishment',
  'commonwealthsaid',
  'theseprosecutions',
  'poll-parrot-wise',
  "administered'that",
  'theamericansabbathunion',
  "notwithstanding'it",
  'stylesofvehicles',
  'themselvesandffarrless',
  'anti-prohibition',
  'theflorentinemartyr',
  'poverty-stricken',
  'duetonasalcatarrh',
  'supportingbustles',
  'zarassewmesseepmaimpaw',
  'petitionsofthelongislandfishermen',
  'christianstatesman',
  'icarapriociaovrat',
  'statute-intrenched',
  'theyshallbedoneaway',
  'office-worshiping',
  'elfqpronocincing',
  "prornittetitpapets'are",
  'andwasonlywaiting',
  'ouriettidrofbthe',
  'elattliimiiiiimi',
  'counterpetitions',
  'ioutlinesvividlytherelationthatexistedbetween',
  'languagearchbishop',
  "orton'simproveddrenchinggil",
  'notwiamstannfrfo',
  'catholicsunderstand',
  "jitdge'pennypacker",
  'nationalreligiousliberty',
  'sunday-observance',
  "and'unmistakably",
  'beaphjseabrighklong',
  'gold-from-sea-water',
  'andtheschemewentthrough',
  'peorepresentative',
  'iiiiiiiiiiiiiiiii',
  'statuteintrenched',
  'foreignnationality',
  'mmsmwtimmmuummlimmw',
  'associationssentafrom',
  'iniquity-steeped',
  'nineieeathncentury',
  'commissioner-general',
  'politisentiments',
  'anti-imperialists',
  'well-constructed',
  'chattanoogadaily',
  'secretary--foster',
  'fellow-petitioners',
  'andsugarbeetland',
  'penny-in-the-slot',
  "remarkable'importation",
  'stalwart-looking',
  'magistratebelieveth',
  'printedongoodpaper',
  'incompetentreligious',
  'advertisementcaptured',
  'quasi-partnership',
  'indisputabletruth',
  'forgivethesetears',
  'self-contradiction',
  'physicalnecessity',
  'ailliliilliiiiiiimiiiiii',
  'corporaexplained',
  'ailopteclasnytbratedl',
  'owisosossorramomontr',
  'smallconsideration',
  'anxietyofthesundaypreachersisto',
  'butthenishallknow',
  'self-stultifying',
  'stevensonandhisconstituents',
  'thebookisneatlygottenup',
  'self-stultification',
  'ecclesiastisustain',
  'imprisonmenttomakemenwiserandtteer',
  'following-conclusions',
  'beenurgohtintothechurchbythe',
  'fellow-religionists',
  'questiondestruction',
  'karaprieicrovrai',
  'comprehensiveview',
  'revolutionarywar',
  'conversation-published',
  'conditionsprevailing',
  'anddiseasesofthe',
  'condignpunishment',
  'anti-imperialism',
  'americanimperialism',
  'perfectionshould',
  'reefeoivredyratrsefeinreqnucierietso',
  'alreadyfarniliar',
  'therightsofthepeople',
  'accordpreservation',
  'icldiisoienaawary',
  'andhebaselyintimates',
  'willianimckinley',
  'commanderin-chief',
  'extreme-distress-of',
  'thesecommunities',
  'selfregeneration',
  'austria-hungarian',
  'non-commissioned',
  'vanymoohearrnramedneorastonninneljadyek',
  'non-interference',
  'publishingcompany',
  'national-reform-sabbath-union-sundayclosing',
  'iknowevenasalsoi',
  'notwithstandingconstitutional',
  'actotjejtotactat',
  'isfullofhappysur',
  "worlsd'exposition",
  'alnericanbentiuel',
  "embedding'itself",
  'rapidlyincreasing',
  'establishredemption',
  'liraitedlerritorrreads',
  'karapyllobaovrat',
  'anti-constitutional',
  'determinationforesaid',
  'nomorethanfollowing',
  'imbibingreligious',
  'minister-secretary',
  'government-without-the',
  'twasintrafalgarsbay',
  'comingsolongastheservicesareheld',
  'inter-communication',
  'weligtonsiliberty',
  'constructionsupon',
  'iiiiiiiiiiiiiiii',
  'politicalatheism',
  'idnfvtleilubageiok',
  'iiiiimiiiiimmiiiiini',
  'religious-persecution',
  'tennesseeforbids',
  'ever-threatening',
  'non-communicating',
  'stampswillbeacceptableforsmallremit',
  'bibleobjectlessons',
  "don'ttakeyaonufaingrocuytwuhnatitl",
  'refceoivreydeisarsinir',
  'religfundamental',
  'successfuyllreosssecuthd',
  'imiiiiimiiiiimiiiiim',
  'gasternppaasssenger',
  'uponllegislation',
  'supisillustrated',
  'socially-degenerating',
  'convertedintoaholiday',
  'long-anticipated',
  'conimissioicfrem',
  'andadvanceordershavebeenreceivedforhundredsofcopies',
  'sorely-persecuted',
  "it'diaerithinates",
  'includingtheologians',
  'madeinstitutions',
  'toanythingtending',
  'sundaylawsclaimthat',
  'venerablespioneer',
  "theworl'd-fathed",
  'dark-superstitions',
  'caramminambemimennommirom',
  'faceteytvlaoliat',
  'ttttiiiiiityttttttttttf',
  'american-sabbath-union',
  'whichweresosuddenlyconvertedintoreligiopolitical',
  'theehouseholdmoellerofhealth',
  'non-preservative',
  'suppressordinance',
  'pleasure-seekers',
  'namesofscripture',
  'sientripientimea',
  'interestingthisweek',
  'songsforlittleones',
  'ittttttttttttttttttti',
  'enforcepolitical',
  "aseeuted'jjaammees",
  'toexerciseanycoercionwhatever',
  "l'itite'situcat'",
  "heading'paragraphs",
  "that'association",
  'office-distributing',
  'excitement-loving',
  'thereligicairiberty',
  'exereiseithereofv',
  'liberty-lovingstatesmen',
  'inresponsibility',
  'photo-electrotyping',
  'thoroughlyfurnished',
  'theirconvictions',
  'judgesteinoverruledthepleathat',
  'bwaltztatoyeatort',
  'bibleillustrationsandstoriesthatwehavebeen',
  'religiouscharaeter',
  'consideraminister',
  "religiously'observe",
  'heavenlycitizenship',
  'ongregationaliistychluich',
  'singlesubscription',
  'smokeof-burning-',
  'heavyto-be-borne',
  'compelleartalligten',
  'seventyfive-dollar',
  'carefully-guarded',
  'adaptthelifeofchristtothe',
  'nationalpridethat',
  'zondaysschencling',
  'pseudo-christianity',
  'prisonconsecrated',
  'church-and-state',
  'protectionaccorded',
  'postmaster-general',
  "salisbury'sgearlesscorset",
  'cannot-regard-their',
  'fellow-countrymen',
  'especiallysuited',
  'smintrifilivmila',
  'thoseinstruments',
  "governmentsgod's",
  'importantpiestion',
  'ckinciickieseuonf',
  'ivilerlicelvajle',
  'appallingproportions',
  'great-grandfather',
  'sixtymile-an-hour',
  'romanismandcivilliberty',
  'compulsory-idleness',
  'constistitutional',
  'correctlyrepresent',
  'constitution--legislation',
  'democratic-republican',
  'gtilttertisenteitth',
  'employmentelsewhere',
  'idouwillixtnintrcuicstoru',
  'sseellrfonouncing',
  'ffitymityytyymyytyymtv',
  'andthysicalvigor',
  "the'difficulties",
  'ihavereceivedmybible',
  'religio-politicians',
  'democraticgovernment',
  'counter-memorials',
  'familygovernment',
  'racravripyvioatv',
  'atatatatatatatatatat',
  'ifwiththetongues',
  'sundayconcertintheoperahouse',
  'commandment-keepers',
  'allworkingpeople',
  "apartmentbuilding's",
  'disconnectedherself',
  'florencejarizona',
  'practicepersecutionfor',
  'intentionallyignore',
  'miiiiimiiiiiimiiiiimiimmiiiiim',
  'certainconditions',
  'religioueliberty',
  'lieutenant-colonel',
  'scatteredthrough',
  'religionsithings',
  'appealandremonstrance',
  'itigillihwililljaiiira',
  'accomplishstatement',
  'furtherexpressed',
  'practicalreference',
  'sendittoyourfriends',
  'theargumentwhich',
  "money-gatherers'",
  'seventh-day-keeping',
  'amusement-loving',
  'divinely-imposed',
  'nviaenikensommewniegoe',
  'asgoodassellsfor',
  'agnosticsperhaps',
  'saturday-sabbath',
  'mmerrimmilummummulimmmummillunmil',
  'sacredychronology',
  'semi-reitschensk',
  'perrnariehipolitieo',
  'heaven-descended',
  'ttttttttttttttttttttttttttttli',
  'missiourielected',
  'pseudo-religious',
  "superintendents'",
  'writefordescebtlyocattuegue',
  'sunday-legislation',
  'self-opinionated',
  'protestant-jesuit',
  'permissibleunder',
  'weaschristianworkersinthecauseofchristdeem',
  'ofassortedhealthfoodcrackerssentpost',
  'cenacliolieeoelpe',
  'beuncompromisingiyoppesedto',
  'priziateyinterviews',
  'politicsirepresents',
  'divinely-ordained',
  'independencelies',
  'kaitnyrdieixicseuonf',
  'theyosemitevalley',
  'everybodylaughed',
  'counter-revolution',
  'the-mediterranean',
  'sheepskin-covered',
  "and'spiritualists",
  'scientificamerican',
  'otherinsurrectionists',
  'dayadventistsandthecourts',
  'nationshilthighty',
  'thesafeligiousintolerancefromwhichallreligious',
  'zntsthxtrealgcterxwc',
  'social-amusement-loving',
  'enough-punishment',
  'insurpassability',
  'ritualhealtheltreat',
  "arrested'c-harged",
  'merieanstatepapers',
  'quarter-centennial',
  "teifige'znegivgtig",
  'diosthnontoptuhfci',
  'civilgovernmentandreligion',
  'breckinridge-morse',
  'amazingprevalence',
  'christiansunconsciously',
  'amtrintcarkilong',
  'thegiairoraffitiliw',
  'etianprinciplesof',
  'importancethanthe',
  'sundaylawmovement',
  "calieds'aisealpt",
  'court-martialled',
  "xrcavolitio'obegbhuingezanra",
  'kirchengeschichte',
  'american-philippines',
  'inquisitor-general',
  'church-fellowship',
  'thenaturalallianceexisting',
  'independencovhich',
  'anti-evangelical',
  'quickly-discovered',
  'self-preservation',
  'carriedoutinthenameofthewholecatholiccommu',
  "the''''anierican",
  'nineteenthcentury',
  'correspondentadmits',
  'advertisement-writers',
  'undervitalizatiom',
  'pagancounterfeits',
  'counter-petition',
  'christianity-with',
  'larciestiiedical',
  "attorney-general's",
  'overwhelminglyin',
  're-enteringfields',
  'decently-dressed',
  "national'apostasy",
  'andtobringdowndamnationandcursesuponevery',
  "ruted'jamestanner",
  'compelattendance',
  'othermakesmaybegood',
  'specialarrangement',
  'indifferentiated',
  'theunitedstatessenate',
  'assurriptionists',
  'amitricanininelo',
  'itisjustwhatihavelongwanted',
  'selfpreservation',
  'ittenmtlettmmtrimitilm',
  'uticompromisingly',
  'certainlynoeffort',
  'inconsistencythe',
  'presbyterianbrother',
  'aviorousandtisrrinad',
  'revolutionaryresolution',
  'fdiesthnontoptubfef',
  'subscriptionprice',
  'commercial-appeal',
  'unitedstatesconstitutionasit',
  'rapidly-increasing',
  'ecclesiasticocivil',
  'evidenceattachecl',
  'thebiblegivenasapresentforsixnewyearlysubscriptionsat',
  "our-times-'toward",
  'fellow-believers',
  'brigadier-general',
  'religio-philosophic',
  'mmiimiiiiiiimiiiiiimiiiimm',
  'icarapyrieliaovrat',
  'entiremembership',
  "pgafria'nb'tatif",
  'reuaftintofbeeticed',
  'ftillieratillteminnimiennisiiiiiiwangmwo',
  'papacywasfullydeveloped',
  "se'whatetherssay",
  "administration'was",
  'fifteenth-century',
  'icfaytoaulhoagveoafnoyuirdepaerobflpiucracthiasoin',
  'politicaldiseussions',
  "oriall'ittiseiprer",
  'pointofdisturbance',
  'would-bereformers',
  'ten-thousand-mile',
  'whichgovernments',
  'thedowadelegationrand',
  'tobemightyupontheearth',
  'prohibitionblasphemy',
  'church-cherished',
  'temporalexpediency',
  'thenationalsundaylawbanned',
  'incomprehensibilities',
  'belieftprqbrship',
  'astothemeritsofthebibleweoffer',
  'amendmentthought',
  'onstratethepropriety',
  'catholiestandard',
  'educationaljathe',
  'wemustthereforeconcludethatthe',
  'includedwhatever',
  'whatabouttheindividualwho',
  'pageillustrations',
  'tenderrestsupontendrive',
  'precipitatelyfrom',
  'goodsubstantialhighgradebicy',
  'thedifferentstates',
  'mueontoutlhninfg',
  'penalties-enacted',
  'furtherinformation',
  'thesentinellibrary',
  'antipedo-baptists',
  'soul-crushingcorporations',
  "smitli'sdialraifi",
  'religiousinstruction',
  'mixiimiiiiimiiiiiinniiiiime',
  'gttittertistinents',
  'ordinaryinstruments',
  "teachers'fecieration",
  'church-instituted',
  'defendthemselves',
  'theamericansentinel',
  'righteousnessright',
  'fourtlybommandment',
  're-establishment',
  "will-o'-the-wisp",
  'sanctimoniouspolitical',
  "students'library",
  'forashorttimeonly',
  'ever-compassionate',
  'aravarimiiiiininisruninisimigivar',
  'hethatspeakswithatongue',
  'socialist-catholic',
  'imomenzipipimmiiisim',
  'foreigncountries',
  'muchtoitseducationalvalue',
  'luinrdeorstparnod',
  'addireadytobreakandoverwhelmitinsocial',
  "atnerican'exposition",
  "will-o'-thewisps",
  'miiiiimiiitimomi',
  'individualchristians',
  'andpronouncesthemwith',
  'leaderoftheaceinpracticalimprovements',
  'sthepapacyinprophecy',
  'underacknowledgment',
  'nationalconstitution',
  'pilateunderstood',
  'samplecopiesmailed',
  'commander-in-chief',
  'neofthelargestsanitariumin',
  'containingadditional',
  'selfstultification',
  'caveatsjrademarks',
  'cross-questioning',
  'waspresentatthetrialoutlinestheproceed',
  'admininistration',
  'statedistinguished',
  "fox'sbookofmartyrs",
  'labor-protecting',
  'nineteenth-century',
  'presbytericvnism',
  'itfollowsthatthisisnot',
  'ex-attorney-general',
  'pacificpressmussingco',
  "proipnhepsayri'npanardt",
  'ourbabyisatestimonialtosanitariumfood',
  'self-sufficiency',
  'tdivinitycircuit',
  'idsimpleconstruction',
  'whichisperfectlyproperifthey',
  'presspublishingco',
  'statcesonstitution',
  'knowledge-disseminating',
  'politicalcorruption',
  'inventioncertain',
  'sergeant-at-arms',
  'inseparablerelation',
  'thecounselforthe',
  'civicrighteousness',
  'religious--observance',
  'densely-populated',
  'prohibitspriests',
  'afitritifiralneviran',
  'observinstitution',
  'physical-necessity',
  'religio-political',
  'interdenominationalism',
  'opportunitiesfor',
  'uncompromisinglyopposed',
  'evaseparate-isfr',
  'non-intervention',
  'information-seekers',
  'half-century-old',
  "wetfavatwarseletertese'letetesetew",
  'thecatholicchurchcannotdoany',
  'papacyinprophecy',
  'politicalreligious',
  'billiard-playing',
  'unfpracarpanmici',
  'persecutionbothinrussiaandgermany',
  'theresponsibility',
  'hisgloriousappearing',
  'importantito-day',
  'religious-liberty',
  'karapynolicrerat',
  'cynosureindorses',
  'instanceexpansion',
  'evangelical-lutheran',
  'thegreatestreformer',
  'pago-christianity',
  'unctralitioraitiftkly',
  'postagestampsaccepted',
  'thtshbeoonikahnauscbriethfohrder',
  'trance-mediumship',
  'act-of-parliament',
  'titmitmmitimmvitimmvimimim',
  'constitutional-principles',
  'morally-instructed',
  'ithethobjecickainnaidig',
  'attorney-general',
  'spiritualmindedness',
  'latecommissionerof',
  'awnfloaiiiavhmasct',
  'straight-jacketed',
  'tailtstkibitiontiottitallp',
  'christianisabbath',
  'prayer-meetmeeting',
  "ictrliot'ocoteuhi",
  'pseudo-millennium',
  'vanderbilt-rockefeller',
  "themselves'damnation",
  'piearksetooasdas',
  'politicalspeeches',
  'anotherinfluentialcommittee',
  'independtrespassers',
  'tax-gatherportant',
  "be'liarticulatif",
  'direcmanufacture',
  'vssbiatotiaysord',
  'religioustraining',
  'ednimittaeappointed',
  'sabbathdesecration',
  'systeinisthestate',
  'carefullyselected',
  'theseintroductory',
  'brecorrespondent',
  'sikteefitlfeentuty',
  'perfectlycertain',
  'reprefientatives',
  'concordance--subject',
  'concernministers',
  'universally-binding',
  'its-constitutionality',
  'anti-sunday-work',
  'alvjetriiezica-int',
  'abookforthechildren',
  'rapidly-approaching',
  'coinmuniccations',
  'notuhnesreeomwlny',
  'singlosubsoription',
  'state--possesses',
  "frow'massachusetts",
  'iipiiibsbirreinin',
  'self-justification',
  'gitvtriistattnts',
  "mechanicar'processes",
  'papillaryattradtion',
  'ireceivedthebibleingoodcondition',
  'tttttttttttttttttttttttttttti',
  'self-righteousness',
  'uncompromisimily',
  'fundamentalprinciples',
  'frommassachusetts',
  "diligently'instruct",
  'criminalzofficer',
  'iubocnidcmsatlrae',
  'vastexpenditures',
  'noresponsibility',
  'instructiongiven',
  'forbearingoneanotherandforgivingoneanon',
  'rightfullypossess',
  'waterburyamerican',
  'occasionally-found',
  'whichhasforitsobjectaunionofchurchand',
  'itisacompletehistoricalanddescriptivesummary',
  'spirit-wrestlers',
  'aiviericansentinele',
  'thecommonwealths',
  'illustriousvisitor',
  'teodmyuapratliyving',
  'responsibilflicted',
  'non-professional',
  'chrisrequirements',
  'payingsecurities',
  'unitedstatesconstitution',
  'humorist-philosopher',
  'thereligpossible',
  "politically'included",
  'itsotsvesisssiti',
  "l'atrztomiwil'illf",
  "'reconcentrados'",
  'm-hintthyltsfftr',
  'immobility--that',
  'bestandbiggestnewspaper',
  'government-endowed',
  'commandment-keeping',
  'the-incipleneyof',
  'exemption-appendix',
  'itlymnvmmninntvivirmlnyrninnyvvrtfummrmymvmmyy',
  'lieutenant-general',
  'this-communication',
  "tourists'edition",
  'uneompromisingly',
  'overorganization',
  'lspeadebilitated',
  'nationalreligion',
  'civilgovernineat',
  'interferencemust',
  'religious-legislation',
  'isdiscriminationand',
  'handsomelyillustrated',
  'distribution-win',
  'eternally-enduring',
  'accuratepronuncia',
  'pulpit-reverencing',
  'counter-arguments',
  'damefashionandherslaves',
  'certainunalienable',
  'andalsoofthedominionof',
  'temporalgovernments',
  'respectable-looking',
  'differencelbetween',
  'totouristsandallclassesofinvalids',
  'ofpageitwillbeseenthattherevisedsieornreads',
  'andyoushouldreadit',
  'presenting-popish',
  'righteousnessthat',
  'citerdreilediettlith',
  'interferencewith',
  'these-dissenting',
  "american'executive",
  'suchanti-christian',
  'union--embracing',
  'thisencouragement',
  'god-in-the-constitution',
  'repudiconscience',
  'congressman-elect',
  'elfavpronouncing',
  '------------------------',
  'fellow-passenger',
  "the'''onventional",
  'circumstancesithe',
  'concerningthismuch',
  'theresponsibilities',
  'miiiiimiiiiiimiiiiimiiiiimiiiiim',
  'totouristsandall',
  'multi-millionaire',
  'infaithfulwarning',
  'xpositionbnildings',
  'issofarasregards',
  'trans-continental',
  'tleeeeeseeeemeetreeeeeeleeeeoweeek',
  "negoweenalkogee'ree'lkowee'ftielieiegeseilielelereennellege",
  'witgibettbacription',
  'arrestediprpvided',
  'icarapynoicrovray',
  'vrecrlanrrsefeinretinuelerietso',
  'iaicificpresspublishing',
  'qualificaproperly',
  'andsuperiorgoodness',
  'bois-de-boulogne',
  "pernicious'effects",
  'endangeringamerican',
  'sundaymuseum-closing',
  "alldenominations'",
  'half-disheartened',
  'corncommandments',
  'dishonuncivilized',
  'receivingtheamericansentinel',
  'catarrhinhalerfree',
  'ex-congregationalist',
  'christian-civilization',
  'practicestouching',
  'b-uc-h-a-d-n-e-z-z-a-r',
  'betfererigagerfents',
  'isthepapacyinprophecy',
  'nearly-co-extensive',
  'iimmuttimtesetstliumilimosillumetuilmtounntimmilitemttlirmillotmultm',
  'declaratiorrstates',
  'looselegislation',
  'withhandsomedesignincolors',
  'igshallwbheicdiiot',
  'ameeicansentinel',
  'threedollar-a-day',
  'fellow-clergymen',
  'covxaxalkaifrtil',
  'beulicompromisinglyopposedto',
  'gospelredemption',
  'thatzwouhaveilaws',
  "legislativ'fhalls",
  'irreparabledamage',
  "world'sfairinchicago",
  'sparsely-settled',
  'school-inspector',
  'sundaynewspapers',
  'blood-guiltiness',
  'thoroughnational',
  'unparlianientary',
  'postmastergeneral',
  'anti-reformation',
  'theirconfinement',
  'rougotrikitgeusp',
  'pliiiilloototiollipimpiiitilligill',
  'turbulently-inclined',
  'followingpropositions',
  'definitelylocated',
  'articlesfrformom',
  'representativesfromdifferentpar',
  'caytoaulhoavgeoafnyouindepuaobflpicuraethiaosinngs',
  'amsterdampleyden',
  'andtoforbideverythingwhichisnot',
  'unrrecardenpraid',
  'politicalteligionists',
  'containinghotiseholdand',
  'aleaderofthenewdemocracy',
  'whatever-standpoint',
  'selfcontradictory',
  'director-general',
  'anti-expansionists',
  'tilitakgilowledg',
  'inspectorgeneral',
  'llitttitittittilltja',
  'well-proportioned',
  'receiver-general',
  'overwfieliningaria',
  'ptillsfilielesddigrallrgt',
  'loverofcivilandreligiousliberty',
  'appropriationonsunday',
  'cannot-buy-or-sell',
  'constideclaration',
  'civilrgovernment',
  'successfullyused',
  'super-calendered',
  'oliwethoebbjercetekaindr',
  'imidinovosillyisp',
  'governmentappointed',
  "'self-government",
  'butthecommandmentsofgodapplytothe',
  'successfullydprosecuted',
  'widely-different',
  'nagwordsabandebelhievaeqnat',
  'self-gratification',
  'fellowcommissioners',
  'thelawswhichprotectitare',
  'counter-political',
  'thicklyinhabited',
  'bbelebnleunsienagrl',
  'touitrrehinelieteitnhi',
  'robber-chieftans',
  'isdtointctreasekthesubscriptitoinlisltoffthe',
  'thegroundsandartgalleriesmightbeopen',
  'foritseinipirneeaitteiothertaltssittistninuitsrreistleilice',
  'thelliibbeerralliitty',
  'iiinssaerksetooasaas',
  'exercisethemselves',
  'pagancounterfeitsinbtyhe',
  'antipedobaptists',
  'notthelawsregardingsundayobservanceaconcession',
  'prohibitszfreedom',
  'iinothingbetterpublished',
  'thfiftlestidifiblimitirlitif',
  'religiousliberty',
  'fiftyonethousand',
  'protestantseatholics',
  'twenty-four-hour',
  'sanctificationist',
  'developmenthasvaried',
  'itmakesallthedifferencein',
  'ltoobothforemote',
  'christianitywould',
  'highly-civilized',
  'elementarycdaution',
  'recfeoivreydeianrrsefeirnegnucieriteso',
  'appeal-avalanche',
  'verbatimreportofthespeechesof',
  'austro-hungarian',
  'ileustrationsare',
  'ifyoupreferthehalf',
  'legislaincorporation',
  'consciencewillbe',
  'scientifically-proved',
  'thtishbeoomhahnauscbreipsst',
  'websterdictionary',
  'non-church-goers',
  'politico-ethical',
  'amendmentproposed',
  'sectioh-rdeclared',
  'isunconstitutionaland',
  'selfinterpreting',
  'willbesenttoanyaddressonre',
  'itwasshowntohimandto',
  'churchmembership',
  'thanksgiving-days',
  'thatscivilization',
  'no-day-in-particular',
  "religiopolitical'",
  'street-preaching',
  'eieleeriieseceix',
  'american-catholic',
  'wanttopraylongandprayearnestlyand',
  'personal-liberty',
  'lengtlireonimunication',
  'pacificbainescollege',
  "'representative'",
  'sciatriameagency',
  'elkhartcaizeiage',
  'theseextraordinary',
  'butwhetherprophecies',
  'writefordeseriptivecatalogue',
  'previousviotation',
  'divinely-conferred',
  'dynasty-stricken',
  'sunday-journalism',
  'yodfamaegxedceolfflereednatt',
  'theweightofoneof',
  'spiritually-minded',
  'sabbath-observance',
  'renderingallegiance',
  'miiiiimiiiiimitlin',
  "correspondent'of",
  'seriously-minded',
  "ithe'iinpossibillw",
  'sitoorrioraltity',
  'sthvatlusaobtreuafohrte',
  'dearlyunderstand',
  'interestsaffected',
  'gmakeasurecovenant',
  'receivesappropriations',
  'interffeerreennccee',
  'sunday-closinglaws',
  'liberty-exemplifying',
  'circularsandfull',
  'beuncompromisinglyopposed',
  'policemanization',
  'reefeoiredyeiaerrsefe',
  'thanksgiving-day',
  'discriminationshall',
  'bottomwithmetalandready',
  'thtishbeomokahnauscbreipent',
  'libertypossessed',
  "'putratherthayte",
  'theissueswhichthispaperdiscussesarethe',
  'non-ecclesiastical',
  'theauthorhassoughttomakethisbookone',
  'civilly-enforced',
  'whiicchhppiicture',
  'toultknlitttlftten',
  'present-president',
  'divisiondvizithenndividual',
  'no-entanglingalliance',
  'these-jealous-minded',
  'congresstoopposetheadoptionofthejointresolution',
  "didn'tiresigniand",
  'especiallyforconvertingnutsintobutterfor',
  'fikerfilneakatist',
  'otherimprovements',
  'thattheyareinerror',
  "liberty'association",
  'seventh-partof-time',
  'butthewatermanisthebest',
  'kblifibrbatatelto',
  'alutroroxitiphroheil',
  'historyofamerica',
  'slagglezatattogiveitv',
  'lottelltrzoistax',
  'wehaveaselectstockofthisbeautifulandinstructive',
  'lduidcircorouuss',
  'lewatedboyerlifer',
  'imchangeableness',
  'foracieidanpraco',
  'establishingamerican',
  'exciteadmiration',
  'oublisheitquarterly',
  "constitufiou'and",
  'sliiiiimiliinneliiiiim',
  "'constitutional'",
  "missionary'incitiful",
  'an-impossibility',
  'renderacceptible',
  'beenlrecommended',
  'straight-fromthe-shoulder',
  'mudthestreasoning',
  'narrowest-minded',
  'half-consolidated',
  'thwompromisingly',
  'sabbath--desecration',
  'suclitsltyithuetaelfthofrutle',
  "profess'clitistianity",
  'thoroughlyintroduce',
  "resolution'adopted",
  'harmlesstmeeting',
  "self-government'",
  'areportofthehearingonthesundayclosingof',
  'pauobflpicuarcthiaosins',
  'long-established',
  "wew'ilrleceivepostagestampsinsmallquantitiesandanykindofgood",
  'bishop-assistant',
  'sabbathobservance',
  'seventh-dayadventist',
  'self-pronunciation',
  'possessinterposes',
  'puritan-american',
  'unscripturalalso',
  'self-aggrandizement',
  'alifornimiligsts',
  "missionary's'work",
  'significaquestion',
  'individualfreedom',
  'andveryseldomevenin',
  'misunderdulgence',
  "german'missionaries",
  'miiiiimiiiiiimiiiiim',
  'beingtaughtinourschools',
  'breckhriagesundaybill',
  'communityseventy-five',
  'carefully-gleaned',
  'plainly-apparent',
  'wtgeantseacteirvye',
  "legislative'halls",
  'iljniprieciidienirbe',
  'ahatihntenaarvelai',
  'healthandtemperancemiscellany',
  "imperial'government",
  'german-americans',
  'daintily-prepared',
  'divinelyappointed',
  'curiosity-seeking',
  'practiceintosmall',
  'treasurer-general',
  'widelyrecognized',
  'dfamaegxedceolfielreednatt',
  'non-parishioners',
  'civilgovernments',
  'non-interruption',
  'prevailingneglect',
  'post-intelligencer',
  'thunderingcataract',
  'underconsumption',
  'miiiiimiiiiimiciiiimiiimmiiiiim',
  'counsellor-at-law',
  'andthesehavebeenturnedover',
  'constitutionallimitations',
  'civilandreligiousfreedom',
  'aecyteiryyewhere',
  'rvsepvitittauarltsic',
  ...],
 15)

Correction 8 -- Remove long error tokens

In [38]:
# %load shared_elements/remove-tokens-with-long-strings-of-characters.py
prev = cycle
cycle = "correction8"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    sub_list = ["m|M", "e|E", "f|F", "l|L", "i|I", "t|T"]
    
    replacements = []
    for sub in sub_list:
        replacements.append(clean.check_for_repeating_characters(tokens, sub))
    
    replacements = [item for sublist in replacements for item in sublist]
            
    if len(replacements) > 0:
        print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    else:
        pass
    
    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
AmSn18890710-V04-24-page7.txt: [('PACIFICBilliollEoll', ' ')]
AmSn18911126-V06-46-page1.txt: [('PliiiilloototiollIPIMPiiitilligill', ' ')]
AmSn18921020-V07-41-page1.txt: [('iiiiiiiiiiiiii', ' ')]
AmSn18960402-V11-14-page3.txt: [('IIIIIIIIIIIIIIIII', ' '), ('IIIIIIIIIIIIIII', ' ')]
AmSn18960924-V11-38-page5.txt: [('INTERFFEERREENNCCEE', ' ')]
AmSn18980113-V13-02-page1.txt: [('ifigiiiiiiiiiiii', ' ')]
AmSn18980120-V13-03-page1.txt: [('iiiiiiiiiiiiiiii', ' '), ('AilliliilliiiiIIIMIIIIII', ' '), ('MENIIIiiiiiiii', ' ')]
AmSn18990202-V14-05-page13.txt: [('tleeeeeseeeemeetreeeeeeleeeeoweeek', ' ')]
AmSn18990810-V14-31-page14.txt: [('MIIIIIMIIIIIIMIIIIIMIIIIIMIIIIIM', ' ')]
AmSn18990817-V14-32-page14.txt: [('MMVIMAIMUMMIAMIAMIIIMMINEMIIMMIIIMMOM.', ' '), ('MIIIIIiiIIIIMS', ' '), ('MMVIMAIMUMMIAMIAMIIIMMINEMIIMMIIIMMOM.', ' '), ('MMIIMIIIIIIIMIIIIIIMIIIIMM', ' ')]
AmSn18990824-V14-33-page14.txt: [('IIIIIMIIIIIMMIIIIINI', ' '), ('MIXIIMIIIIIMIIIIIINNIIIIIME', ' ')]
AmSn18990831-V14-34-page14.txt: [('MIIIIIIMAIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIIMmimMliiiIM', ' ')]
AmSn18990907-V14-35-page14.txt: [('MIIIIIMMIIIIIIMIIIIIM', ' ')]
AmSn18990914-V14-36-page14.txt: [('SliiiiiMIliinneliiiiim', ' ')]
AmSn18990914-V14-36-page15.txt: [('eitifiltWiffeffalliallill', ' ')]
AmSn18990928-V14-38-page14.txt: [('militiMIIIIIMIllirMIIIIIMIIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIM', ' ')]
AmSn18991019-V14-41-page14.txt: [('IMIIIIIMIIIIIMIIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIMIN', ' '), ('MIIIIIMIIIIIMIIIIIM', ' ')]
AmSn18991102-V14-43-page14.txt: [('MIIIIIIMMIIIIIM', ' '), ('MIIIIIMIIIIIMIIIIII.', ' ')]
AmSn18991109-V14-44-page14.txt: [('MItttIMIIIIIIIIIIMIIIIIMIIIII', ' ')]
AmSn18991207-V14-48-page15.txt: [('M.IIIIIMIIIIIMIIIIIMIIIIMICIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIM', ' ')]
AmSn18991214-V14-49-page15.txt: [('iiiiiiiiiiiiiiii', ' ')]
AmSn18991228-V14-50-page15.txt: [('ImmummtimmomOmmumMommON.MMI.O.Wilimm.MMERRIMMiluMMummuliMMmummillunmil.MminmMuummunmmummismimmil.mmlimmmulimmili.mmsmWtimMmuummlimmw.m.m.ft.mammW.M', ' '), ('mimmummulimmOUmunnmOluimmmumm.m.Mumnaum.mlimmmummumilMi.Mmuimft', ' ')]
AmSn19000104-V15-01-page15.txt: [('MIIIIIMIIIIIMIIIIIMIIIIIMIIIIIM', ' ')]
AmSn19000111-V15-02-page14.txt: [('MIIIIIMIIIIIMICIIIIMIIIMMIIIIIm', ' ')]
AmSn19000118-V15-03-page14.txt: [('MIITIIMIIIIIM', ' '), ('MIIIIIMIIIIIIMIIIIIMIIMMIIIIIM', ' ')]
AmSn19000215-V15-07-page13.txt: [('IIIIIIMIIIIIMaiiiiimulimM', ' ')]
AmSn19000301-V15-09-page13.txt: [('ImprimmommwmimmoVum', ' ')]
AmSn19000308-V15-10-page14.txt: [('MIIIIIMIIIIIIMIIIIIM', ' ')]
AmSn19000322-V15-12-page14.txt: [("NegoweeNalkogee'Ree'lkowee'ftielieiegeseilielelereeNnellege", ' ')]
AmSn19000329-V15-13-page14.txt: [('eeeeeeeeeeeeeeee', ' ')]
AmSn19000517-V15-19-page11.txt: [('TYMMITIMMITIVIIMMIIMMIll', ' ')]
AmSn19000621-V15-24-page15.txt: [('TTTTIIIIIITYTTTTTTTTTTF', ' '), ('LLITTTITITTITTILLTJa', ' ')]
AmSn19000705-V15-26-page15.txt: [('TTTTTTTTTTTTTTTTTTTTTTTTTTTTI', ' ')]
AmSn19000719-V15-28-page10.txt: [('TITMITMMITIMMVITIMMVIMIMIM', ' ')]
AmSn19000823-V15-33-page15.txt: [('TTTTTTTTTTTTTTTTTTTTTTTTTTTTLI', ' '), ('ITTTTTTTTTTTTTTTTTITTTT', ' ')]
AmSn19000823-V15-33-page8.txt: [('itlymnvmmninntvivirmlnyrninnyvvrtfummrmymvmmyy', ' ')]
AmSn19000906-V15-35-page15.txt: [("TTTTTTTTIII'TTTTI", ' '), ('TTTTTTTTTTTTTTT', ' ')]
AmSn19000920-V15-37-page15.txt: [('ITTTTTTTTTTTTTTTTTTTI', ' ')]
In [39]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction8

Average verified rate: 0.9830200792014474

Average of error rates: 0.01865562518651149

Total token count: 8363231

In [40]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[40]:
[("'", 8053),
 ('t', 4426),
 ('e', 3978),
 ('d', 3950),
 ('w', 3754),
 ('co', 3408),
 ('m', 3171),
 ('n', 3046),
 ('f', 2035),
 ('r', 2028),
 ('th', 1645),
 ('g', 1371),
 ('mo', 1160),
 ('u', 926),
 ('x', 864),
 ('ex', 521),
 ('pa', 410),
 ('q', 399),
 ('sunday-law', 334),
 ('k', 315),
 ("the'", 304),
 ('pp', 299),
 ('tion', 276),
 ("conscience'", 260),
 ('ch', 253),
 ('seventhday', 249),
 ('re', 224),
 ('ga', 220),
 ('oc', 218),
 ('z', 215),
 ('wm', 215),
 ('satolli', 210),
 ('employes', 209),
 ('munn', 207),
 ('ti', 200),
 ('id', 181),
 ('un', 173),
 ('ry', 170),
 ('al', 166),
 ('sunday-closing', 160),
 ('ca', 151),
 ('ment', 146),
 ('chain-gang', 136),
 ("to'", 134),
 ('nd', 130),
 ('ll', 128),
 ('lb', 125),
 ('il', 123),
 ('bateham', 122),
 ('cmsar', 121)]

Correction 9 -- Separate Squashed Words

In [41]:
# %load shared_elements/separate_squashed_words.py
import pandas as pd
from math import log

prev = cycle
cycle = "correction9"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

verified_tokens = []

for filename in corpus:  
    content = utilities.readfile(directories['prev'], filename)
    clean.get_approved_tokens(content, spelling_dictionary, verified_tokens)

tokens_with_freq = dict(collections.Counter(verified_tokens))
words = pd.DataFrame(list(tokens_with_freq.items()), columns=['token','freq'])
words_sorted = words.sort_values('freq', ascending=False)
words_sorted_short = words_sorted[words_sorted.freq > 2]

sorted_list_of_words = list(words_sorted_short['token'])

wordcost = dict((k, log((i+1)*log(len(sorted_list_of_words)))) for i,k in enumerate(sorted_list_of_words))
maxword = max(len(x) for x in sorted_list_of_words)

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    text = utilities.strip_punct(content)
    tokens = utilities.tokenize_text(text)
    
    replacements = []
    
    for token in tokens:
        if not token.lower() in spelling_dictionary:
            if len(token) > 17:
                if re.search(r"[\-\-\'\"]", token):
                    pass
                else:
                    split_string = clean.infer_spaces(token, wordcost, maxword)
                    list_split_string = split_string.split()
                    
                    if clean.verify_split_string(list_split_string, spelling_dictionary):
                        replacements.append((token, split_string))
                    else:
                        pass
            else:
                pass
        else:
            pass
        
    if len(replacements) > 0:
        print("{}: {}".format(filename, replacements))
        
        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
AmSn18860101-V01-01-page3.txt: [('accordpreservation', 'accord preservation')]
AmSn18860301-V01-03-page6.txt: [('indifferenumeration', 'in differ enumeration')]
AmSn18860301-V01-03-page8.txt: [('whichhasforitsobjectaunionofchurchand', 'which has for its object a union of church and'), ('whichweresosuddenlyconvertedintoreligiopolitical', 'which were so suddenly converted into religio political')]
AmSn18860501-V01-05-page1.txt: [('establishredemption', 'establish redemption')]
AmSn18860601-V01-06-page1.txt: [('theresponsibilities', 'the responsibilities')]
AmSn18860601-V01-06-page6.txt: [('legislaincorporation', 'leg is lain corporation')]
AmSn18860901-V01-09-page6.txt: [('Presbyterianbrother', 'Presbyterian brother')]
AmSn18861001-V01-10-page7.txt: [('elementaryprinciples', 'elementary principles')]
AmSn18861201-V01-12-page8.txt: [('WEhaveaselectstockofthisbeautifulandinstructive', 'WE have a select stock of this beautiful and instructive'), ('Thebookisneatlygottenup', 'The book is neatly gotten up'), ('printedongoodpaper', 'printed on good paper')]
AmSn18870101-V02-01-page3.txt: [('practicepersecutionfor', 'practice persecution for')]
AmSn18870301-V02-03-page2.txt: [('heartyacknowledgnaont', 'hearty a c know led g n a o n t')]
AmSn18870501-V02-05-page8.txt: [('willeverbeuncompromisingly', 'will ever be uncompromisingly')]
AmSn18870701-V02-07-page4.txt: [('Christianinstitutions', 'Christian institutions')]
AmSn18870801-V02-08-page2.txt: [('determinationforesaid', 'determination foresaid')]
AmSn18871001-V02-10-page8.txt: [('sanctimoniouspolitical', 'sanctimonious political')]
AmSn18880401-V03-04-page5.txt: [('followingpropositions', 'following propositions')]
AmSn18880601-V03-06-page7.txt: [('Stevensonandhisconstituents', 'Stevenson and his constituents')]
AmSn18881001-V03-10-page7.txt: [('havebeennoneofourbusiness', 'have been none of our business')]
AmSn18881015-V03-10a-page7.txt: [('beingtaughtinourschools', 'being taught in our schools'), ('andevenherecognizesinthe', 'and even he recognizes in the')]
AmSn18881201-V03-12-page4.txt: [('ProhibitionBlasphemy', 'Prohibition Blasphemy')]
AmSn18890213-V04-04-page3.txt: [('brieflycomprehended', 'briefly comprehended')]
AmSn18890320-V04-09-page7.txt: [('recuperatingqualities', 'recuperating qualities')]
AmSn18890327-V04-10-page7.txt: [('LOVEROFCIVILANDRELIGIOUSLIBERTY', 'LOVER OF CIVIL AND RELIGIOUS LIBERTY')]
AmSn18890417-V04-13-page7.txt: [('Avigorousandstirringad', 'A vigorous and stirring ad')]
AmSn18890424-V04-14-page7.txt: [('SCIENTIFICAMERICAN', 'SCIENTIFIC AMERICAN')]
AmSn18890515-V04-16-page7.txt: [('sufferingswhichtimelyattentionmighteasilyhaveprevented', 'sufferings which timely attention might easily have prevented')]
AmSn18890522-V04-17-page7.txt: [('Theissueswhichthispaperdiscussesarethe', 'The issues which this paper discusses are the')]
AmSn18890522-V04-17-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18890605-V04-19-page7.txt: [('ButaGoodOneisaJoyForever', 'But a Good One is a Joy Forever'), ('SCIENTIFICAMERICAN', 'SCIENTIFIC AMERICAN')]
AmSn18890612-V04-20-page7.txt: [('LEADEROFTHEACEINPRACTICALIMPROVEMENTS', 'LEADER OF THE ACE IN PRACTICAL IMPROVEMENTS')]
AmSn18890703-V04-23-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18890710-V04-24-page7.txt: [('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION')]
AmSn18890724-V04-26-page7.txt: [('PACIFICBainesCollege', 'PACIFIC B a ines College'), ('yousupposethatthegovernment', 'you suppose that the government')]
AmSn18890807-V04-28-page2.txt: [('brieflycomprehended', 'briefly comprehended')]
AmSn18890807-V04-28-page3.txt: [('Earthlygovernments', 'Earthly governments')]
AmSn18890807-V04-28-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18890821-V04-30-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18890828-V04-31-page2.txt: [('temporalexpediency', 'temporal expediency')]
AmSn18890828-V04-31-page8.txt: [('Allthereformswhichtheassociationregard', 'All the reforms which the association regard')]
AmSn18890905-V04-32-page1.txt: [('afterconsideration', 'after consideration')]
AmSn18890905-V04-32-page2.txt: [('theUnitedStatesSenate', 'the United States Senate')]
AmSn18890918-V04-34-page4.txt: [('ChristianStatesman', 'Christian Statesman')]
AmSn18890918-V04-34-page8.txt: [('profitableemployment', 'profitable employment')]
AmSn18890925-V04-35-page7.txt: [('THEMACHINEHASNEVERBEENUSED', 'THE MACHINE HAS NEVER BEEN USED'), ('THEWEEKLYWISCONSIN', 'THE WEEKLY WISCONSIN'), ('THEWEEKLYWISCONSIN', 'THE WEEKLY WISCONSIN')]
AmSn18891002-V04-36-page7.txt: [('thedifferentStates', 'the different States'), ('THEMACHINEHASNEVERBEENUSED', 'THE MACHINE HAS NEVER BEEN USED')]
AmSn18891016-V04-38-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18891023-V04-39-page6.txt: [('counterattractions', 'counter attractions')]
AmSn18891030-V04-40-page7.txt: [('EMPLOYMENTandWEESE', 'EMPLOYMENT and WEE S E')]
AmSn18891106-V04-41-page7.txt: [('toTouristsandallclassesofInvalids', 'to Tourists and all classes of Invalids')]
AmSn18891113-V04-42-page7.txt: [('ThePicturesqueRouteforBusinessand', 'The Picturesque Route for Business and')]
AmSn18891113-V04-42-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18891120-V04-43-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18891127-V04-44-page2.txt: [('ecclesiasticocivil', 'ecclesiastic o civil')]
AmSn18891127-V04-44-page5.txt: [('thedowadelegationrand', 'the do wade legation rand')]
AmSn18891127-V04-44-page7.txt: [('BESTandBIGGESTNEWSPAPER', 'BEST and BIGGEST NEWSPAPER')]
AmSn18891204-V04-45-page5.txt: [('demonstradepriving', 'demons trade p riving')]
AmSn18891204-V04-45-page7.txt: [('oftextsforeverydayintheyear', 'of texts for everyday in the year'), ('Abookforboysandgirls', 'A book for boys and girls'), ('willbesenttoanyaddressonre', 'will be sent to any address on r e')]
AmSn18891211-V04-46-page7.txt: [('AcresLandintheaboveCounties', 'Acres Land in the above Counties'), ('Theauthorhassoughttomakethisbookone', 'The author has sought to make this book one'), ('HistoryofProtestantism', 'History of Protestantism')]
AmSn18891218-V04-47-page8.txt: [('intentionallyignore', 'intentionally ignore')]
AmSn18891225-V04-48-page7.txt: [('tainingtestimonials', 'tain ing testimonials')]
AmSn18891225-V04-48-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18900116-V05-03-page7.txt: [('inanypropersenseofthe', 'in any proper sense of the')]
AmSn18900130-V05-05-page3.txt: [('studiouslyssecular', 'studiously s secular')]
AmSn18900130-V05-05-page7.txt: [('therecommendations', 'the recommendations')]
AmSn18900206-V05-06-page7.txt: [('SendittoYourFriends', 'Send it to Your Friends'), ('Papacywasfullydeveloped', 'Papacy was fully developed')]
AmSn18900206-V05-06-page8.txt: [('singlosubsoription', 'sing lo sub so rip t i o n')]
AmSn18900213-V05-07-page4.txt: [('barefacedmisrepresentationandbytheidr', 'barefaced misrepresentation and by the i dr')]
AmSn18900213-V05-07-page7.txt: [('furtherinformation', 'further information')]
AmSn18900306-V05-10-page1.txt: [('THEAMERICANSENTINEL', 'THE AMERICAN SENTINEL')]
AmSn18900320-V05-12-page6.txt: [('inestimableblessings', 'inestimable blessings')]
AmSn18900320-V05-12-page7.txt: [('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')]
AmSn18900327-V05-13-page7.txt: [('PresbyterianChurches', 'Presbyterian Churches')]
AmSn18900403-V05-14-page5.txt: [('politicalreligious', 'political religious')]
AmSn18900410-V05-15-page7.txt: [('UNITEDSTATESCONSTITUTION', 'UNITED STATES CONSTITUTION'), ('pieceandSIXBEAUTIFULLYCOLOREDPLATES', 'piece and SIX BEAUTIFULLY COLORED PLATES'), ('containinghotiseholdand', 'containing hot is e h o l d a n d')]
AmSn18900410-V05-15-page8.txt: [('beuncompromisinglyopposed', 'be uncompromisingly opposed')]
AmSn18900417-V05-16-page7.txt: [('UNITEDSTATESCONSTITUTION', 'UNITED STATES CONSTITUTION')]
AmSn18900424-V05-17-page8.txt: [('singlesubscription', 'single subscription')]
AmSn18900508-V05-19-page5.txt: [('nationalcharacteristic', 'national characteristic'), ('spiritualmindedness', 'spiritual mindedness')]
AmSn18900515-V05-20-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18900522-V05-21-page2.txt: [('TOhimthatworkethnotbutbelieveth', 'TO him that worketh not but believeth'), ('Forbearingoneanotherandforgivingoneanon', 'For bearing one another and forgiving one an on')]
AmSn18900522-V05-21-page6.txt: [('Congresstoopposetheadoptionofthejointresolution', 'Congress to oppose the adoption of the joint resolution')]
AmSn18900529-V05-22-page7.txt: [('idsimpleconstruction', 'ids imp le construction')]
AmSn18900605-V05-23-page7.txt: [('PACIFICPRESSMUSSINGCO', 'PACIFIC PRESS M U S S I N G C O'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY'), ('EstablishedonFourContinents', 'Established on Four Continents')]
AmSn18900612-V05-24-page7.txt: [('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')]
AmSn18900626-V05-26-page7.txt: [('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')]
AmSn18900717-V05-28-page7.txt: [('EstablishedonFourContinents', 'Established on Four Continents')]
AmSn18900731-V05-30-page7.txt: [('similarstylesformerlysoldandstiltofferedatmuch', 'similar styles formerly sold and stilt offered at much'), ('Verbatimreportofthespeechesof', 'Verbatim report of the speeches of')]
AmSn18900807-V05-31-page1.txt: [('authoritativelywhat', 'authoritatively what')]
AmSn18900807-V05-31-page7.txt: [('TheSalemWitchcraft', 'The Salem Witchcraft'), ('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION')]
AmSn18900821-V05-33-page7.txt: [('BattleCreekBakeryCo', 'Battle Creek Bakery C o'), ('CIVILGOVERNMENTANDRELIGIONDUPLICATING', 'CIVIL GOVERNMENT AND RELIGION DUPLICATING')]
AmSn18900828-V05-34-page7.txt: [('MERICANINSTITUTIONS', 'MER I CAN INSTITUTIONS'), ('CIVILGOVERNMENTANDRELIGIONDUPLICATING', 'CIVIL GOVERNMENT AND RELIGION DUPLICATING')]
AmSn18900918-V05-37-page1.txt: [('compelleartaLligten', 'compel le art a L l i g t e n')]
AmSn18900918-V05-37-page7.txt: [('Sentbymailonreceiptofprice', 'Sent by mail on receipt of price')]
AmSn18900925-V05-38-page7.txt: [('HEALTHFOODCRACKERS', 'HEALTH FOOD CRACKERS')]
AmSn18901002-V05-39-page1.txt: [('whateverappropriation', 'whatever appropriation')]
AmSn18901002-V05-39-page7.txt: [('Constitutionsoldiery', 'Constitution soldiery')]
AmSn18901009-V05-40-page8.txt: [('weasChristianworkersinthecauseofChristdeem', 'we as Christian workers in the cause of Christ deem'), ('UnitedStatesConstitutionasit', 'United States Constitution as it')]
AmSn18901023-V05-42-page1.txt: [('correspondentadmits', 'correspondent admits')]
AmSn18901023-V05-42-page4.txt: [('ionastotherightOfGovernmenttocorrect', 'ion as to the right Of Government to correct')]
AmSn18901030-V05-43-page7.txt: [('Ioutlinesvividlytherelationthatexistedbetween', 'I outlines vividly the relation that existed between'), ('TheAmericanSabbathUnion', 'The American Sabbath Union')]
AmSn18901113-V05-45-page6.txt: [('Thenriollownumerous', 'Then rio l low numerous')]
AmSn18901113-V05-45-page7.txt: [('THENATIONALSUNDAYLAWbanned', 'THE NATIONAL SUNDAY LAW ban ned')]
AmSn18901218-V05-50-page10.txt: [('revolutionaryresolution', 'revolutionary resolution')]
AmSn18910108-V06-02-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY'), ('OfassortedHealthFoodCrackerssentpost', 'Of assorted Health Food Crackers sent post'), ('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18910115-V06-03-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY'), ('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18910212-V06-07-page3.txt: [('certainunalienable', 'certain unalienable')]
AmSn18910212-V06-07-page7.txt: [('HEALTHFOODCRACKERS', 'HEALTH FOOD CRACKERS')]
AmSn18910226-V06-09-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910312-V06-11-page7.txt: [('furtherparticulars', 'further particulars')]
AmSn18910319-V06-12-page7.txt: [('PEOPLINGOFTHEEARTH', 'PEOPLING OF THE EARTH'), ('UNITEDSTATESCONSTITUTION', 'UNITED STATES CONSTITUTION')]
AmSn18910319-V06-12-page8.txt: [('singlesubscription', 'single subscription')]
AmSn18910402-V06-14-page7.txt: [('PEOPLINGOFTHEEARTH', 'PEOPLING OF THE EARTH')]
AmSn18910409-V06-15-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910409-V06-15-page8.txt: [('specialarrangement', 'special arrangement')]
AmSn18910416-V06-16-page6.txt: [('inscriptionianitig', 'inscription ian it i g'), ('differentreligious', 'different religious')]
AmSn18910416-V06-16-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910430-V06-18-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910507-V06-19-page7.txt: [('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910521-V06-21-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910723-V06-29-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18910730-V06-30-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18910827-V06-34-page6.txt: [('ReligiousLibertyAssociation', 'Religious Liberty Association')]
AmSn18910903-V06-35-page7.txt: [('STHEPAPACYINPROPHECY', 'S THE PAPACY IN PROPHECY')]
AmSn18910910-V06-36-page7.txt: [('TheHouseholdMonitorofHealth', 'The Household Monitor of Health'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')]
AmSn18910917-V06-37-page7.txt: [('TheHouseholdMonitorofHealth', 'The Household Monitor of Health'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')]
AmSn18910924-V06-38-page7.txt: [('TheHouseholdMonitorofHealth', 'The Household Monitor of Health'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18911008-V06-39-page7.txt: [('ThunderingCataract', 'Thundering Cat ar act'), ('TheHouseholdWalterofHealth', 'The Household Walter of Health'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18911015-V06-40-page7.txt: [('BYWILLIAMJACKSONARMSTRONG', 'BY WILLIAM JACKSON ARMSTRONG')]
AmSn18911029-V06-42-page7.txt: [('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY'), ('LateCommissionerof', 'Late Commissioner of')]
AmSn18911224-V06-50-page7.txt: [('petitionsoftheLongIslandfishermen', 'petitions of the Long Island fishermen')]
AmSn18920107-V07-01-page6.txt: [('denominationalists', 'denomination a lists')]
AmSn18920204-V07-05-page7.txt: [('andYOUshouldreadit', 'and YOU should read it')]
AmSn18920211-V07-06-page1.txt: [('spiritualmindedness', 'spiritual mindedness')]
AmSn18920218-V07-07-page3.txt: [('politicalinstitutions', 'political institutions')]
AmSn18920310-V07-10-page8.txt: [('disestablishmentmay', 'disestablishment may')]
AmSn18920324-V07-12-page6.txt: [('Sundayconcertintheoperahouse', 'Sunday concert in the opera house'), ('theCatholicChurchcannotdoany', 'the Catholic Church cannot do any')]
AmSn18920324-V07-12-page7.txt: [('SAMPLECOPIESMAILED', 'SAMPLE COPIES MAI L ED')]
AmSn18920407-V07-14-page7.txt: [('everypersoninterestedinmissionaryworkathome', 'every person interested in missionary work at home')]
AmSn18920428-V07-17-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')]
AmSn18920526-V07-21-page7.txt: [('SAMPLECOPIESMAILED', 'SAMPLE COPIES MAI L ED')]
AmSn18920623-V07-25-page6.txt: [('NATIONALRELIGIOUSLIBERTY', 'NATIONAL RELIGIOUS LIBERTY'), ('representativesfromdifferentpar', 'representatives from different par')]
AmSn18920630-V07-26-page3.txt: [('protectionaccorded', 'protection accorded')]
AmSn18920721-V07-28-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')]
AmSn18920728-V07-29-page5.txt: [('represeneverlasting', 'rep res en everlasting')]
AmSn18920811-V07-31-page4.txt: [('therepresentatives', 'the representatives')]
AmSn18920811-V07-31-page7.txt: [('theultrawingofthechurchpeople', 'the ultra wing of the church people'), ('thegroundsandartgalleriesmightbeopen', 'the grounds and art galleries might be open')]
AmSn18921006-V07-39-page7.txt: [('ITISACOMPLETEHISTORICALANDDESCRIPTIVESUMMARY', 'IT IS A COMPLETE HISTORICAL AND DESCRIPTIVE SUMMARY')]
AmSn18921013-V07-40-page8.txt: [('receivingTHEAMERICANSENTINEL', 'receiving THE AMERICAN SENTINEL')]
AmSn18921020-V07-41-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')]
AmSn18921027-V07-42-page6.txt: [('nineteentwentieths', 'nineteen twentieth s')]
AmSn18921027-V07-42-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')]
AmSn18921117-V07-45-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')]
AmSn18921124-V07-46-page6.txt: [('Thereisnosafetyforourcountry', 'There is no safety for our country'), ('andsuperiorgoodness', 'and superior goodness'), ('SecretaryAmericanSecuraUnion', 'Secretary American Sec ur a Union')]
AmSn18921201-V07-47-page2.txt: [('disconnectedherself', 'disconnected herself')]
AmSn18930126-V08-04-page7.txt: [('persecutionbothinRussiaandGermany', 'persecution both in Russia and Germany'), ('notbeopenedonSunday', 'not be opened on Sunday'), ('isunconstitutionaland', 'is unconstitutional and')]
AmSn18930223-V08-08-page7.txt: [('thattheyareinerror', 'that they are in error'), ('andthatthelawsofthe', 'and that the laws of the')]
AmSn18930309-V08-10-page6.txt: [('Associationssentafrom', 'Associations sent a from')]
AmSn18930309-V08-10-page7.txt: [('pointofdisturbance', 'point of disturbance'), ('Itwasshowntohimandto', 'It was shown to him and to')]
AmSn18930323-V08-12-page1.txt: [('Governmentappointed', 'Government appointed')]
AmSn18930608-V08-23-page8.txt: [('JudgeSteinoverruledthepleathat', 'Judge Stein overruled the plea that'), ('Itmakesallthedifferencein', 'It makes all the difference in')]
AmSn18930615-V08-24-page5.txt: [('recentlyinterviewed', 'recently interviewed')]
AmSn18930622-V08-25-page7.txt: [('BibleObjectLessons', 'Bible Object Lessons'), ('SongsforLittleOnes', 'Songs for Little Ones')]
AmSn18930629-V08-26-page7.txt: [('BATTLECREEKBAKERYCO', 'BATTLE CREEK BAKERY C O')]
AmSn18930706-V08-27-page7.txt: [('AReportoftheHearingontheSundayClosingof', 'A Report of the Hearing on the Sunday Closing of'), ('ItEnablesEveryManandWoman', 'It Enables Every Man and Woman')]
AmSn18930713-V08-28-page7.txt: [('BATTLECREEKBAKERYCO', 'BATTLE CREEK BAKERY C O')]
AmSn18930713-V08-28-page8.txt: [('interestingthisweek', 'interesting this week')]
AmSn18930720-V08-29-page6.txt: [('religiouspersecution', 'religious persecution')]
AmSn18930727-V08-30-page6.txt: [('thejudgmentmayfallonanypartofasinfulnation', 'the judgment may fall on any part of a sinful nation')]
AmSn18930803-V08-31-page7.txt: [('TheGemofHealthFoods', 'The Gem of Health Foods'), ('OurbabyisatestimonialtoSanitariumfood', 'Our baby is a testimonial to Sanitarium food'), ('andisasruddyandhealthya', 'and is as ruddy and healthy a')]
AmSn18930810-V08-32-page7.txt: [('AppealandRemonstrance', 'Appeal and Remonstrance'), ('theFlorentineMartyr', 'the Florentine Martyr')]
AmSn18930817-V08-33-page7.txt: [('TheGemofHealthFoods', 'The Gem of Health Foods')]
AmSn18930907-V08-35-page7.txt: [('TheGemofHealthFoods', 'The Gem of Health Foods')]
AmSn18930921-V08-37-page8.txt: [('feWoriiiiiresseTatthe', 'feW or iii i ir esse Tat the')]
AmSn18931116-V08-45-page7.txt: [('adaptthelifeofChristtothe', 'adapt the life of Christ to the')]
AmSn18931123-V08-46-page7.txt: [('SPECIALHOLIDAYOFFER', 'SPECIAL HOLIDAY OFFER')]
AmSn18931221-V08-50-page7.txt: [('Bibleillustrationsandstoriesthatwehavebeen', 'Bible illustrations and stories that we have been')]
AmSn18940125-V09-04-page1.txt: [('thechurchforAmerica', 'the church for America'), ('thenaturalallianceexisting', 'the natural alliance existing')]
AmSn18940215-V09-07-page7.txt: [('andtobringdowndamnationandcursesuponevery', 'and to bring down damnation and curses upon every'), ('Couldfollybegreaterthanpraying', 'Could folly be greater than praying')]
AmSn18940329-V09-13-page7.txt: [('withgeographicalstatisticalnotes', 'with geographical statistical notes')]
AmSn18940412-V09-15-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18940419-V09-16-page5.txt: [('underacknowledgment', 'under acknowledgment')]
AmSn18940524-V09-21-page4.txt: [('addireadytobreakandoverwhelmitinsocial', 'addi ready to break and overwhelm it in social')]
AmSn18940621-V09-25-page7.txt: [('isthesameinallagesoftheworld', 'is the same in all ages of the world')]
AmSn18940802-V09-31-page2.txt: [('toexerciseanycoercionwhatever', 'to exercise any coercion whatever')]
AmSn18940823-V09-33-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18941004-V09-39-page5.txt: [('receivesappropriations', 'receives appropriations')]
AmSn18941018-V09-41-page3.txt: [('anxietyoftheSundaypreachersisto', 'anxiety of the Sunday preachers is to'), ('nomorethanfollowing', 'no more than following')]
AmSn18941018-V09-41-page4.txt: [('carriedoutinthenameofthewholeCatholiccommu', 'carried out in the name of the whole Catholic com m u')]
AmSn18941101-V09-43-page8.txt: [('convertedintoaholiday', 'converted into a holiday'), ('Thelawswhichprotectitare', 'The laws which protect it are')]
AmSn18941122-V09-46-page7.txt: [('ACRITICALHARMONYoftheGOSPELS', 'A CRITICAL HARMONY of the GOSPELS')]
AmSn18941206-V09-48-page8.txt: [('fromtheAdventistprintingofficeistheSchutzenplatz', 'from the Adventist printing office is the S c hut z e n p l a t z')]
AmSn18941213-V09-49-page7.txt: [('comingsolongastheservicesareheld', 'coming so long as the services are held'), ('wanttopraylongandprayearnestlyand', 'want to pray long and pray earnestly and'), ('comeoutopenlyinfavorofreligiousedu', 'come out openly in favor of religio use du'), ('hassoclearlyspoken', 'has so clearly spoken'), ('andhebaselyintimates', 'and he basely intimates'), ('WillpolishastovebetterClumany', 'Will polish a stove better C l u m a n y')]
AmSn18950117-V10-03-page1.txt: [('illustriousvisitor', 'illustrious visitor')]
AmSn18950124-V10-04-page7.txt: [('tenderrestsupontendrive', 'tender rests upon ten drive')]
AmSn18950131-V10-05-page3.txt: [('merieanStatePapers', 'me rie an State Papers')]
AmSn18950307-V10-10-page2.txt: [('notthelawsregardingSundayobservanceaconcession', 'not the laws regarding Sunday observance a concession'), ('wouldbecomeuniversal', 'would become universal')]
AmSn18950425-V10-17-page6.txt: [('GoodSubstantialHighGradeBICY', 'Good Substantial High Grade BIC Y'), ('includingtheologians', 'including theologians')]
AmSn18950509-V10-19-page4.txt: [('whatabouttheindividualwho', 'what about the individual who'), ('Transvaalgovernment', 'Transvaal government')]
AmSn18950620-V10-25-page1.txt: [('Wemustthereforeconcludethatthe', 'We must therefore conclude that the')]
AmSn18950725-V10-30-page7.txt: [('HISGLORIOUSAPPEARING', 'HIS GLORIOUS APPEARING')]
AmSn18950725-V10-30-page8.txt: [('anduncompromisingly', 'and uncompromisingly')]
AmSn18950822-V10-33-page5.txt: [('lieutenantgovernor', 'lieutenant governor')]
AmSn18950822-V10-33-page7.txt: [('dayAdventistsandthecourts', 'day Adventists and the courts')]
AmSn18950926-V10-38-page5.txt: [('domiriatelegislation', 'dom iri ate legislation')]
AmSn18951010-V10-40-page7.txt: [('muchtoitseducationalvalue', 'much to its educational value')]
AmSn18951017-V10-41-page7.txt: [('andtheschemewentthrough', 'and the scheme went through'), ('andthesehavebeenturnedover', 'and these have been turned over'), ('anddraggedhimofftojail', 'and dragged him off to jail'), ('andtoforbideverythingwhichisnot', 'and to forbid everything which is not'), ('theonlydangerwhichthreatenstheliberties', 'the only danger which threatens the liberties')]
AmSn18951031-V10-43-page7.txt: [('butwhetherprophecies', 'but whether prophecies'), ('theyshallbedoneaway', 'they shall be done away')]
AmSn18951121-V10-46-page7.txt: [('andveryseldomevenin', 'and very seldom even in')]
AmSn18951128-V10-47-page3.txt: [('fellowcommissioners', 'fellow commissioners')]
AmSn18951128-V10-47-page8.txt: [('THERIGHTSofthePEOPLE', 'THE RIGHTS of the PEOPLE')]
AmSn18951219-V10-50-page7.txt: [('waspresentatthetrialoutlinestheproceed', 'was present at the trial outlines the proceed')]
AmSn18960109-V11-02-page7.txt: [('andalsooftheDominionof', 'and also of the Dominion of'), ('Othermakesmaybegood', 'Other makes may be good'), ('buttheWATERMANISTHEBEST', 'but the WATERMAN IS THE BEST')]
AmSn18960319-V11-12-page7.txt: [('SweepingEverything', 'Sweeping Everything')]
AmSn18960326-V11-13-page7.txt: [('SweepingEverything', 'Sweeping Everything')]
AmSn18960402-V11-14-page3.txt: [('RELIGIOUSintolerance', 'RELIGIOUS intolerance')]
AmSn18960409-V11-15-page8.txt: [('Itfollowsthatthisisnot', 'It follows that this is not')]
AmSn18960416-V11-16-page8.txt: [('whichisperfectlyproperifthey', 'which is perfectly proper if they')]
AmSn18960430-V11-18-page6.txt: [('Sabbathdesecration', 'Sabbath desecration')]
AmSn18960430-V11-18-page7.txt: [('AttractiveBoardCover', 'Attractive Board Cover')]
AmSn18960514-V11-20-page7.txt: [('andpronouncesthemwith', 'and pronounces them with'), ('tobemightyupontheearth', 'to be mighty upon the earth')]
AmSn18960702-V11-26-page7.txt: [('enablesittopromotethehealth', 'enables it to promote the health')]
AmSn18960730-V11-30-page7.txt: [('ButthecommandmentsofGodapplytothe', 'But the commandments of God apply to the'), ('FormtheGREATTHROUGHLINEtoall', 'Form the GREAT THROUGH LINE to all'), ('TWOCANNIBALARCHIPELAGOES', 'TWO CANNIBAL ARCHIPELAGOES')]
AmSn18960813-V11-32-page4.txt: [('controversieswhich', 'controversies which')]
AmSn18960903-V11-35-page2.txt: [('entitledtoallthebenefitsofcivilsocieyt', 'entitled to all the benefits of civil so c i e y t')]
AmSn18961001-V11-39-page7.txt: [('GeorgeFredWilliams', 'George Fred Williams'), ('ALeaderoftheNewDemocracy', 'A Leader of the New Democracy'), ('Othermakesmaybegood', 'Other makes may be good'), ('buttheWATERMANISTHEBEST', 'but the WATERMAN IS THE BEST')]
AmSn18961015-V11-41-page7.txt: [('SendforCircularandTerms', 'Send for Circular and Terms'), ('Stampswillbeacceptableforsmallremit', 'Stamps will be acceptable for small remit')]
AmSn18961119-V11-46-page2.txt: [('closingdecadeofthenineteenthcentury', 'closing decade of the nineteenth century')]
AmSn18961126-V11-47-page4.txt: [('Religiousestablishments', 'Religious establishments')]
AmSn18961210-V11-49-page7.txt: [('withhandsomedesignincolors', 'with handsome design in colors')]
AmSn18970121-V12-03-page10.txt: [('wouldbeunchristian', 'would be unchristian')]
AmSn18970121-V12-03-page11.txt: [('inseparablerelation', 'inseparable relation')]
AmSn18970121-V12-03-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18970128-V12-04-page16.txt: [('NationalConstitution', 'National Constitution')]
AmSn18970204-V12-05-page13.txt: [('ecclesiasticalinstitution', 'ecclesiastical institution')]
AmSn18970211-V12-06-page3.txt: [('theseextraordinary', 'these extraordinary')]
AmSn18970311-V12-10-page15.txt: [('NationalConstitution', 'National Constitution')]
AmSn18970311-V12-10-page4.txt: [('Congregationalchurch', 'Congregational church')]
AmSn18970318-V12-11-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O'), ('ABookfortheChildren', 'A Book for the Children')]
AmSn18970325-V12-12-page8.txt: [('unalterableprinciple', 'unalterable principle')]
AmSn18970401-V12-13-page16.txt: [('thisbookreadingismadeeasy', 'this book reading is made easy'), ('MyBiblereceivedthis', 'My Bible received this')]
AmSn18970408-V12-14-page5.txt: [('TheItaliangovernment', 'The Italian government')]
AmSn18970415-V12-15-page15.txt: [('gasternPPaasssenger', 'gas tern P P a ass sen ger')]
AmSn18970422-V12-16-page16.txt: [('PracticalReference', 'Practical Reference')]
AmSn18970506-V12-18-page16.txt: [('thoroughlyintroduce', 'thoroughly introduce')]
AmSn18970513-V12-19-page1.txt: [('fundamentalprinciples', 'fundamental principles')]
AmSn18970520-V12-20-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18970520-V12-20-page16.txt: [('Ifyoupreferthehalf', 'If you prefer the half'), ('themonthlypaymentwillbe', 'the monthly payment will be')]
AmSn18970603-V12-22-page12.txt: [('religiousinstruction', 'religious instruction')]
AmSn18970610-V12-23-page5.txt: [('interdenominationalism', 'inter denominationalism')]
AmSn18970624-V12-25-page10.txt: [('approvingconscience', 'approving conscience')]
AmSn18970624-V12-25-page5.txt: [('strictlyeconomical', 'strictly economical')]
AmSn18970701-V12-26-page14.txt: [('PACIFICPRESSPUBLISHINGCP', 'PACIFIC PRESS PUBLISHING C P')]
AmSn18970707-V12-27-page15.txt: [('InvaluabletoBibleandHistoryStudents', 'Invaluable to Bible and History Students')]
AmSn18970722-V12-29-page2.txt: [('advancingevolution', 'advancing evolution')]
AmSn18970722-V12-29-page4.txt: [('endangeringAmerican', 'endangering American')]
AmSn18970805-V12-31-page9.txt: [('gamblingfraternity', 'gambling fraternity')]
AmSn18970819-V12-33-page3.txt: [('ilvyettoAnaximandercreditfortheinventionofathin', 'i lv yet to Anaximander credit for the invention of a thin')]
AmSn18970916-V12-36-page16.txt: [('TheBiblegivenasapresentforsixNEWyearlysubscriptionsat', 'The Bible given as a present for six NEW yearly subscriptions at')]
AmSn18971007-V12-39-page16.txt: [('AstothemeritsoftheBibleweoffer', 'As to the merits of the Bible we offer'), ('IhavereceivedmyBible', 'I have received my Bible'), ('andtosayIamwellpleasedwould', 'and to say I am well pleased would')]
AmSn18971014-V12-40-page14.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18971014-V12-40-page16.txt: [('ItisjustwhatIhavelongwanted', 'It is just what I have long wanted'), ('IreceivedtheBibleingoodcondition', 'I received the Bible in good condition')]
AmSn18971021-V12-41-page14.txt: [('expensesreasonable', 'expenses reasonable')]
AmSn18971028-V12-42-page16.txt: [('ofpageitwillbeseenthattheRevisedsieornreads', 'of page it will be seen that the Revised s i e o r n r e a d s')]
AmSn18971028-V12-42-page2.txt: [('dangerousnegligence', 'dangerous negligence')]
AmSn18971028-V12-42-page9.txt: [('politicalcorruption', 'political corruption')]
AmSn18971111-V12-44-page1.txt: [('democraticgovernment', 'democratic government')]
AmSn18971111-V12-44-page7.txt: [('thegreatestreformer', 'the greatest reformer')]
AmSn18971125-V12-46-page14.txt: [('especiallyforconvertingnutsintobutterfor', 'especially for converting nuts into butter for')]
AmSn18971209-V12-48-page4.txt: [('probablyinfluenced', 'probably influenced')]
AmSn18971230-V12-50-page13.txt: [('differencelbetween', 'difference l between')]
AmSn18980106-V13-01-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18980113-V13-02-page15.txt: [('IllustratingtheTravelsofPaul', 'Illustrating the Travels of Paul')]
AmSn18980113-V13-02-page16.txt: [('togiveourpatronsabenefit', 'to give our patrons a benefit')]
AmSn18980120-V13-03-page14.txt: [('DameFashionandHerSlaves', 'Dame Fashion and Her Slaves'), ('HealthandTemperanceMiscellany', 'Health and Temperance Miscellany')]
AmSn18980120-V13-03-page3.txt: [('individualChristians', 'individual Christians')]
AmSn18980120-V13-03-page6.txt: [('heavenlycitizenship', 'heavenly citizenship')]
AmSn18980127-V13-04-page14.txt: [('oftheCYCLONECONTINUES', 'of the CYCLONE CONTINUES')]
AmSn18980203-V13-05-page14.txt: [('IllustratingtheTravelsofPaul', 'Illustrating the Travels of Paul')]
AmSn18980210-V13-06-page8.txt: [('righteousnessright', 'righteousness right')]
AmSn18980210-V13-06-page9.txt: [('Onstratethepropriety', 'On st rate the propriety')]
AmSn18980217-V13-07-page15.txt: [('Anothervaluablefeature', 'Another valuable feature')]
AmSn18980224-V13-08-page6.txt: [('ourChristiancharacter', 'our Christian character')]
AmSn18980303-V13-09-page14.txt: [('bottomwithmetalandready', 'bottom with metal and ready')]
AmSn18980303-V13-09-page7.txt: [('EstablishingReligious', 'Establishing Religious')]
AmSn18980310-V13-10-page14.txt: [('Catalogueofallourstyles', 'Catalogue of all our styles'), ('CatarrhInhalerFree', 'Catarrh Inhaler Free')]
AmSn18980331-V13-13-page14.txt: [('IllustratingtheTravelsofPaul', 'Illustrating the Travels of Paul')]
AmSn18980331-V13-13-page15.txt: [('Anothervaluablefeature', 'Another valuable feature')]
AmSn18980407-V13-14-page12.txt: [('covetousdisposition', 'covetous disposition')]
AmSn18980414-V13-15-page14.txt: [('Communicationsstrictly', 'Communications strictly')]
AmSn18980512-V13-19-page6.txt: [('intermeddlehimself', 'intermeddle himself')]
AmSn18980602-V13-22-page7.txt: [('Christiansunconsciously', 'Christians unconsciously'), ('Catholicsunderstand', 'Catholics understand')]
AmSn18980609-V13-23-page14.txt: [('SolidVestibuledPullmanDiningandSleepingCarTrains', 'Solid Vestibuled Pullman Dining and Sleeping Car Trains')]
AmSn18980609-V13-23-page15.txt: [('hethatspeakswithatongue', 'he that speaks with a tongue')]
AmSn18980609-V13-23-page9.txt: [('civicrighteousness', 'civic righteousness')]
AmSn18980616-V13-24-page14.txt: [('ManualofParliamentaryRules', 'Manual of Parliamentary Rules')]
AmSn18980616-V13-24-page15.txt: [('weseethroughaglass', 'we see through a glass')]
AmSn18980623-V13-25-page3.txt: [('correctlyrepresent', 'correctly represent')]
AmSn18980623-V13-25-page5.txt: [('questiondestruction', 'question destruction')]
AmSn18980630-V13-26-page14.txt: [('handsomelyillustrated', 'handsomely illustrated')]
AmSn18980811-V13-31-page3.txt: [('PhilippineArchipelago', 'Philippine Archipelago')]
AmSn18980818-V13-32-page8.txt: [('establishingAmerican', 'establishing American')]
AmSn18980915-V13-36-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18980922-V13-37-page11.txt: [('smallconsideration', 'small consideration')]
AmSn18980929-V13-38-page2.txt: [('voluntarilydescended', 'voluntarily descended')]
AmSn18981006-V13-39-page14.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18981110-V13-44-page14.txt: [('reeFeoiredyeiaerrsefe', 'ree Fe o ire dye i a errs e f e')]
AmSn18981110-V13-44-page3.txt: [('ecclesiasticalpowers', 'ecclesiastical powers')]
AmSn18981208-V13-48-page15.txt: [('ScientificAmerican', 'Scientific American')]
AmSn18981215-V13-49-page7.txt: [('otherinsurrectionists', 'other insurrection i sts')]
AmSn18990105-V14-01-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990112-V14-02-page14.txt: [('InterestingandInstructive', 'Interesting and Instructive')]
AmSn18990112-V14-02-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990126-V14-04-page12.txt: [('biblicalChristianity', 'biblical Christianity')]
AmSn18990126-V14-04-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990126-V14-04-page16.txt: [('Americanimperialism', 'American imperialism')]
AmSn18990126-V14-04-page3.txt: [('certainiinalienable', 'certain i inalienable')]
AmSn18990202-V14-05-page5.txt: [('Americandomination', 'American domination')]
AmSn18990209-V14-06-page5.txt: [('thoroughlyfurnished', 'thoroughly furnished')]
AmSn18990316-V14-11-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990316-V14-11-page6.txt: [('earthlygovernments', 'earthly governments')]
AmSn18990330-V14-13-page10.txt: [('injusticeinseparable', 'injustice inseparable')]
AmSn18990330-V14-13-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990511-V14-19-page15.txt: [('Postagestampsaccepted', 'Postage stamps accepted')]
AmSn18990518-V14-20-page12.txt: [('appallingproportions', 'appalling proportions')]
AmSn18990518-V14-20-page13.txt: [('Ordernowandgetthebenefitofasplendidhelpin', 'Order now and get the benefit of a splendid help in')]
AmSn18990518-V14-20-page14.txt: [('InterestingandInstructive', 'Interesting and Instructive')]
AmSn18990525-V14-21-page6.txt: [('indicateverymoderate', 'indicate very moderate')]
AmSn18990608-V14-23-page13.txt: [('grosslyjdiscriminating', 'grossly j discriminating')]
AmSn18990615-V14-24-page11.txt: [('employmentelsewhere', 'employment elsewhere')]
AmSn18990713-V14-27-page13.txt: [('Oneofthemostinterestingvolumeseverpublished', 'One of the most interesting volumes ever published')]
AmSn18990727-V14-29-page11.txt: [('temporalgovernments', 'temporal governments')]
AmSn18990803-V14-30-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990810-V14-31-page10.txt: [('followingresolution', 'following resolution')]
AmSn18990824-V14-33-page7.txt: [('exercisethemselves', 'exercise themselves')]
AmSn18990831-V14-34-page5.txt: [('foreignnationality', 'foreign nationality')]
AmSn18990914-V14-36-page15.txt: [('IINothingBetterPublished', 'II Nothing Better Published')]
AmSn18991005-V14-39-page10.txt: [('louderdemonstrations', 'louder demonstrations')]
AmSn18991005-V14-39-page15.txt: [('Containingadditional', 'Containing additional')]
AmSn18991019-V14-41-page3.txt: [('overworkingthemselves', 'over working themselves')]
AmSn18991102-V14-43-page2.txt: [('secureirecognition', 'secure i recognition')]
AmSn18991116-V14-45-page11.txt: [('alreadyestablished', 'already established')]
AmSn18991130-V14-47-page3.txt: [('objectionableithing', 'objectionable i thing')]
AmSn19000104-V15-01-page5.txt: [('notwithstandingconstitutional', 'notwithstanding constitutional'), ('GenuineChristianity', 'Genuine Christianity')]
AmSn19000111-V15-02-page7.txt: [('ordinaryinstruments', 'ordinary instruments')]
AmSn19000111-V15-02-page8.txt: [('advertisementcaptured', 'advertisement captured')]
AmSn19000118-V15-03-page7.txt: [('independtrespassers', 'in depend trespassers')]
AmSn19000118-V15-03-page8.txt: [('renderingallegiance', 'rendering allegiance')]
AmSn19000125-V15-04-page12.txt: [('advocateindependence', 'advocate independence')]
AmSn19000201-V15-05-page4.txt: [('civilandreligiousfreedom', 'civil and religious freedom')]
AmSn19000208-V15-06-page14.txt: [('bindingforcontinuous', 'binding for continuous')]
AmSn19000208-V15-06-page2.txt: [('developmenthasvaried', 'development has varied')]
AmSn19000215-V15-07-page1.txt: [('principleunderlying', 'principle underlying')]
AmSn19000215-V15-07-page14.txt: [('beautifullystamped', 'beautifully stamped')]
AmSn19000301-V15-09-page3.txt: [('establishingtheRoman', 'establishing the Roman')]
AmSn19000315-V15-11-page10.txt: [('accustomingthemselves', 'a c custom ing themselves')]
AmSn19000315-V15-11-page13.txt: [('literaryentertainments', 'literary entertainments')]
AmSn19000315-V15-11-page6.txt: [('isdiscriminationand', 'is discrimination and'), ('discriminationshall', 'discrimination shall')]
AmSn19000405-V15-14-page3.txt: [('Politicsirepresents', 'Politics i represents')]
AmSn19000405-V15-14-page6.txt: [('languageArchbishop', 'language Archbishop')]
AmSn19000426-V15-17-page14.txt: [('StateDistinguished', 'State Dist ing u i s h e d')]
AmSn19000426-V15-17-page3.txt: [('Sundaylawsclaimthat', 'Sunday laws claim that')]
AmSn19000719-V15-28-page1.txt: [('Sabbathinstitution', 'Sabbath institution')]
AmSn19000719-V15-28-page3.txt: [('eveirtowardiaristocracy', 'eve ir toward i aristocracy'), ('magistratebelieveth', 'magistrate believeth')]
AmSn19000719-V15-28-page8.txt: [('conditionsprevailing', 'conditions prevailing')]
AmSn19000809-V15-31-page12.txt: [('theireleeterafPrOeirtil', 'the ire lee ter a f P r O e i r t i l')]
AmSn19000816-V15-32-page3.txt: [('considerablepolitical', 'considerable political')]
AmSn19000830-V15-34-page10.txt: [('constitutionallimitations', 'constitutional limitations')]
AmSn19000830-V15-34-page11.txt: [('prosecutingattorney', 'prosecuting attorney')]
AmSn19000830-V15-34-page14.txt: [('interestinghistories', 'interesting histories')]
AmSn19000913-V15-36-page14.txt: [('caYearforMedicalFees', 'c a Year for Medical Fees')]
AmSn19000920-V15-37-page8.txt: [('incompetentreligious', 'incompetent religious')]
AmSn19000928-V15-38-page5.txt: [('generacovetousness', 'genera covetousness')]
AmSn19001018-V15-41-page11.txt: [('religiOuscharaeter', 'religiOus char aet er')]
AmSn19001025-V15-42-page2.txt: [('everlastinginheritance', 'everlasting inheritance')]
AmSn19001129-V15-47-page11.txt: [('Christiancitizenship', 'Christian citizenship')]
AmSn19001129-V15-47-page5.txt: [('accomplishstatement', 'accomplish statement')]
AmSn19001206-V15-48-page10.txt: [('betfererigagerfents', 'bet fere rig a ger fe n t s')]
AmSn19001206-V15-48-page6.txt: [('appropriationonSunday', 'appropriation on Sunday')]
In [42]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction9

Average verified rate: 0.9830732455807398

Average of error rates: 0.018591465234258434

Total token count: 8364805

In [43]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[43]:
[("'", 8053),
 ('t', 4433),
 ('e', 3990),
 ('d', 3954),
 ('w', 3754),
 ('co', 3408),
 ('m', 3174),
 ('n', 3056),
 ('f', 2037),
 ('r', 2033),
 ('th', 1645),
 ('g', 1375),
 ('mo', 1160),
 ('u', 930),
 ('x', 864),
 ('ex', 521),
 ('pa', 410),
 ('q', 399),
 ('sunday-law', 334),
 ('k', 315),
 ("the'", 304),
 ('pp', 299),
 ('tion', 276),
 ("conscience'", 260),
 ('ch', 253),
 ('seventhday', 249),
 ('re', 224),
 ('ga', 220),
 ('oc', 218),
 ('z', 217),
 ('wm', 215),
 ('satolli', 210),
 ('employes', 209),
 ('munn', 207),
 ('ti', 200),
 ('id', 181),
 ('un', 173),
 ('ry', 170),
 ('al', 166),
 ('sunday-closing', 160),
 ('ca', 151),
 ('ment', 146),
 ('chain-gang', 136),
 ("to'", 134),
 ('nd', 130),
 ('ll', 128),
 ('lb', 125),
 ('il', 123),
 ('bateham', 122),
 ('cmsar', 121)]
In [ ]: