AmSn-OCR-Evaluation-and-Correction
%load_ext autoreload
%autoreload 2
from text2topics import reports
from text2topics import utilities
from text2topics import clean
import re
import os
from os import listdir
from os.path import isfile, join
import collections
%matplotlib inline
wordlist_dir = "/Users/jeriwieringa/Dissertation/drafts/data/word-lists"
wordlists = ["2016-12-07-SDA-last-names.txt",
"2016-12-07-SDA-place-names.txt",
"2016-12-08-SDA-Vocabulary.txt",
"2017-01-03-place-names.txt",
"2017-02-14-Base-Word-List-SCOWL&KJV.txt",
"2017-02-14-Roman-Numerals.txt",
"2017-03-01-Additional-Approved-Words.txt"
]
spelling_dictionary = utilities.create_spelling_dictionary(wordlist_dir, wordlists)
title = "AmSn"
base_dir = "/Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/{}/".format(title)
Baseline¶
cycle = 'baseline'
stats = reports.overview_report(join(base_dir, cycle), spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/baseline Average verified rate: 0.9417475625771581 Average of error rates: 0.059656669650850494 Total token count: 8534424
errors_summary = reports.get_errors_summary( stats )
reports.top_errors( errors_summary, 100 )
[('-', 12650),
('ñ', 11855),
("'", 8075),
('tion', 6722),
('con-', 5879),
('re-', 5789),
('¥', 5128),
('t', 4124),
('ment', 4081),
('in-', 3962),
(')', 3929),
('d', 3884),
('e', 3716),
('w', 3705),
('co', 3389),
('m', 3097),
('de-', 2928),
('com-', 2715),
('n', 2658),
('be-', 2447),
('pro-', 2149),
('sun-', 1953),
('f', 1919),
('ex-', 1874),
('chris-', 1869),
('*', 1867),
('r', 1832),
('tions', 1677),
('th', 1673),
('en-', 1576),
('dis-', 1563),
('govern-', 1495),
('(', 1448),
('gov-', 1339),
('g', 1311),
('per-', 1282),
('sab-', 1252),
('tian', 1181),
('mo', 1146),
('un-', 1141),
('na-', 1133),
('ernment', 1060),
('reli-', 1056),
('ance', 1023),
('ob-', 1011),
('pre-', 998),
('ments', 942),
('ad-', 937),
('ity', 935),
('sunday-law', 923),
('ac-', 911),
('tional', 911),
('ñthe', 905),
('u', 881),
(']', 868),
('ligious', 836),
('im-', 815),
('ap-', 807),
('ple', 790),
('sub-', 738),
('x', 730),
('ence', 714),
('an-', 698),
('gious', 675),
('relig-', 665),
('ques-', 625),
('peo-', 623),
('ers', 621),
('at-', 601),
("'the", 596),
('al-', 586),
('as-', 576),
('inter-', 561),
('to-', 554),
('pub-', 546),
('them-', 544),
('fol-', 536),
('prin-', 522),
('constitu-', 520),
('ligion', 516),
('erty', 511),
('_', 511),
('/', 510),
('sup-', 507),
('for-', 498),
('tianity', 490),
('au-', 484),
('stitution', 475),
('coun-', 472),
('ious', 464),
('the-', 460),
('em-', 453),
('pur-', 447),
('observ-', 446),
('cath-', 441),
('any-', 439),
('amer-', 437),
('lib-', 430),
('gress', 425),
('there-', 414),
('sev-', 411),
('legisla-', 410),
('[the', 409),
('with-', 394),
('wor-', 386),
('legis-', 381),
('--', 381),
('sen-', 380),
('or-', 379),
('op-', 375),
('pa', 375),
('ican', 375),
('mis-', 373),
('ent', 372),
('consti-', 366),
('estab-', 365),
('ful', 355),
('servance', 354),
('man-', 353),
('under-', 352),
('cer-', 351),
('lished', 351),
('lation', 351),
('es-', 350),
('q', 350),
('chain-gang', 349),
('lic', 345),
('ble', 344),
('di-', 343),
('pres-', 342),
('tution', 342),
('ber', 341),
('gion', 339),
('can-', 338),
('ña', 338),
('%', 335),
('ar-', 334),
('princi-', 332),
('ture', 326),
('move-', 325),
('tive', 321),
('ous', 316),
('gen-', 316),
('olic', 315),
('sunday-closing', 314),
("the'", 312),
('thority', 312),
('prot-', 311),
('ject', 310),
('command-', 305),
('his-', 303),
('tians', 300),
('ical', 297),
('repre-', 295),
('su-', 295),
('eral', 295),
('se-', 293),
('so-', 292),
('ual', 291),
('=', 291),
('par-', 291),
('mat-', 290),
('ation', 290),
('k', 288),
('state-', 288),
('author-', 287),
('him-', 285),
('-the', 285),
('sunday-', 283),
('amend-', 282),
('sunday-rest', 281),
('pp', 281),
('cor-', 281),
('indi-', 281),
('¥¥', 281),
('ples', 280),
('ciples', 277),
('ñthat', 277),
('ex', 275),
('ameri-', 271),
('institu-', 268),
('mem-', 268),
('every-', 266),
('ch', 266),
('ists', 265),
('ma-', 262),
('neces-', 262),
('ñno', 260),
('ters', 260),
('of-', 259),
('senti-', 259),
('sec-', 258),
('pa-', 257),
('seventh-', 256),
('work-', 256),
('po-', 253),
('some-', 251),
('doc-', 247),
('ciple', 246),
('sim-', 243),
('pos-', 242),
('(the', 240),
('busi-', 238),
('ary', 237),
("'of", 236),
('char-', 235),
('evi-', 233),
('follow-', 233),
('perse-', 231),
('mo-', 230),
('insti-', 229),
('mand', 229),
('lieve', 228),
("conscience'", 228),
('ã', 226),
('recog-', 226),
('bers', 225),
('num-', 221),
('prac-', 221),
('leg-', 219),
('tained', 219),
('ñand', 218),
('mitted', 217),
('suc-', 216),
('what-', 216),
('**', 216),
('ga', 216),
('individ-', 216),
('out-', 214),
('oc', 214),
('sition', 213),
('free-', 213),
('mandment', 212),
('wm', 212),
('accord-', 211),
('how-', 210),
('prop-', 210),
('pel', 210),
('main-', 206),
('munn', 206),
('fellow-citizens', 204),
('satolli', 204),
('ten-', 203),
('rec-', 203),
('preme', 201),
('ab-', 201),
('king-', 200),
('***', 198),
('over-', 198),
('christian-', 197),
('hu-', 196),
('sented', 193),
('yo', 193),
('col-', 192),
('sions', 190),
('noth-', 190),
('illus-', 188),
('ure', 188),
('citi-', 187),
('fur-', 187),
('eng-', 186),
('right-', 185),
('min-', 184),
('tures', 184),
('z', 184),
('associa-', 184),
('stand-', 183),
('sur-', 183),
('exer-', 182),
('ry', 182),
('advent-', 182),
('tainly', 182),
('argu-', 182),
('sible', 182),
('tem-', 180),
('invari-', 179),
('employes', 178),
('catho-', 176),
('protest-', 176),
('thou-', 175),
('car-', 175),
('accom-', 175),
('dred', 174),
('ti', 173),
('judg-', 173),
('persecu-', 173),
("'a", 172),
('self-', 172),
('struction', 172),
('teach-', 171),
('ther', 170),
('inde-', 170),
('conse-', 170),
('ca', 170),
('dence', 170),
('dividual', 169),
('vio-', 168),
('posi-', 167),
('ñthomas', 167),
('co-', 167),
('ro-', 166),
('mittee', 166),
('`', 165),
('sary', 165),
('-of', 164),
('tinel', 164),
('ñi', 162),
('ñit', 162),
('af-', 161),
('hun-', 160),
('rep-', 160),
('(see', 159),
('attorney-general', 159),
('al', 159),
('organ-', 159),
('un-american', 156),
('trans-', 155),
('tary', 155),
('rest-day', 154),
('estant', 154),
('asso-', 153),
('(entered', 153),
('mony', 153),
('law-abiding', 152),
('rea-', 152),
('sys-', 152),
("'to", 152),
('des-', 152),
('uni-', 151),
('ference', 150),
('mc', 149),
("'and", 149),
('ceived', 148),
('enforce-', 148),
('bap-', 148),
('conven-', 148),
('har-', 147),
('presi-', 147),
('cen-', 147),
('law-', 146),
('differ-', 146),
('ñrev', 146),
('acter', 146),
('ciation', 146),
('sa-', 145),
('[from', 145),
('peti-', 145),
('states-', 145),
('vention', 145),
('scrip-', 144),
('fied', 144),
('jus-', 144),
('trol', 144),
('sabbath-breaking', 144),
('dif-', 143),
('clared', 142),
('religio-political', 142),
('cir-', 142),
('representa-', 141),
('protes-', 141),
('ish', 141),
('vidual', 141),
('ventists', 141),
('ular', 141),
('organiza-', 141),
('ist', 140),
('minis-', 140),
('tation', 140),
('duced', 139),
('(and', 139),
('re', 139),
('tains', 138),
('post-', 138),
('instruc-', 137),
('id', 137),
('dren', 137),
('cial', 137),
('terest', 136),
('influ-', 136),
('ished', 136),
('trated', 136),
('non-', 136),
('mands', 136),
('enth-day', 136),
("to'", 134),
('ered', 133),
('chil-', 133),
('cially', 133),
('ô', 132),
('educa-', 132),
('pros-', 132),
('intro-', 132),
('no-', 132),
('trary', 132),
('meet-', 131),
('fa-', 131),
('ica', 130),
('pun-', 130),
('cently', 130),
('mar-', 129),
('quired', 129),
('vest-pocket', 129),
('nessee', 128),
('denomina-', 128),
('ilar', 128),
('se', 128),
('jority', 128),
('manded', 127),
('ll', 127),
('-to', 127),
('tives', 127),
('pelled', 126),
('rian', 126),
('dition', 126),
('thatñ', 126),
('ated', 125),
('¡', 125),
('cise', 125),
('litical', 125),
('nal', 125),
('lish', 125),
('wash-', 124),
('cated', 124),
('mit', 124),
('parlia-', 124),
('consid-', 124),
('sug-', 123),
('olics', 123),
('tem', 122),
('ñnew', 122),
('ures', 122),
('secution', 122),
('cept', 121),
('news-', 121),
('hon-', 121),
('ized', 120),
('establish-', 120),
('eousness', 120),
('il', 119),
('nd', 119),
('oc-', 119),
('advo-', 118),
('cution', 118),
('edu-', 118),
('serv-', 118),
('islation', 118),
('counter-arguments', 117),
('rela-', 117),
('sabbath-', 116),
('mainte-', 116),
('cmsar', 116),
('condi-', 115),
('do-', 115),
('conscien-', 115),
('resolu-', 115),
('chi-', 115),
('ances', 115),
('sat-', 114),
('lb', 114),
('ingly', 114),
('sin-', 114),
('polit-', 114),
('stitutions', 113),
('ington', 113),
('sabbath-day', 113),
('refer-', 113),
('pression', 112),
('stat-', 112),
('nounced', 112),
('gos-', 112),
('fellow-', 112),
('aleck', 112),
('apos-', 112),
("'i", 111),
('spect', 111),
('cerning', 111),
('sunday-keeping', 110),
('circum-', 110),
('\\', 110),
('libertyñchristian', 109),
('ciety', 109),
("'in", 109),
('nel', 109),
('sus-', 109),
('non-sectarian', 108),
("and'", 108),
('bateham', 107),
('reform-', 107),
('appro-', 107),
('tant', 107),
('forcement', 107),
('%x', 107),
('ies', 107),
('deter-', 106),
('dan-', 106),
('politi-', 106),
('sentin', 106),
('mandments', 106),
('ñjesus', 106),
('pe-', 106),
('espe-', 105),
('suf-', 105),
('is-', 105),
('ñto', 105),
('ñin', 104),
('estants', 104),
('eration', 104),
('depart-', 104),
("'is", 104),
('prom-', 104),
('ning', 104),
('meth-', 103),
('well-', 103),
('cc', 103),
('ne-', 103),
('funda-', 103),
('cussion', 103),
('ñnot', 103),
('•', 102),
('milly', 102),
('ern', 102),
('tered', 102),
('neigh-', 102),
('righteous-', 102),
('pol-', 102),
('sity', 102),
('pendence', 102),
('sanc-', 101),
('sionary', 101),
('cago', 101),
('possi-', 101),
('nity', 101)]
Correction 1 -- Special Characters¶
The first common error appears to be dangling line endings. However, in order to best capture and fix those endings, I am first normalizing the line ending characters and address special characters. Before removing, I will check for regular non-English language use to see if there are particular characters that should be preserved.
reports.tokens_with_special_characters(errors_summary)
[('ñ', 11855),
('¥', 5128),
(')', 3929),
('*', 1867),
('(', 1448),
('ñthe', 905),
(']', 868),
('_', 511),
('/', 510),
('[the', 409),
('ña', 338),
('%', 335),
('=', 291),
('¥¥', 281),
('ñthat', 277),
('ñno', 260),
('(the', 240),
('ã', 226),
('ñand', 218),
('**', 216),
('***', 198),
('ñthomas', 167),
('`', 165),
('ñi', 162),
('ñit', 162),
('(see', 159),
('(entered', 153),
('ñrev', 146),
('[from', 145),
('(and', 139),
('ô', 132),
('thatñ', 126),
('¡', 125),
('ñnew', 122),
('\\', 110),
('libertyñchristian', 109),
('%x', 107),
('ñjesus', 106),
('ñto', 105),
('ñin', 104),
('ñnot', 103),
('•', 102),
('`the', 98),
('ñchristian', 96),
('-¥', 94),
('ñany', 90),
('(a', 85),
('(which', 80),
('[', 80),
('(or', 78),
('(as', 78),
('ñbut', 73),
('¤', 72),
('ñby', 71),
('(new', 68),
('(for', 68),
('(in', 67),
('[of', 67),
('ñis', 66),
('[sunday]', 66),
('ñas', 66),
('ñwhich', 65),
('libertyñchris-', 65),
('<', 64),
('ñif', 64),
('the¥', 64),
('+', 63),
('ñan', 62),
('ñthis', 62),
('ñyes', 61),
('(sunday)', 60),
('[in', 60),
('¥¥¥', 59),
('—', 58),
('(n', 58),
('(i', 57),
('ñfor', 56),
('¥the', 55),
('(rev', 54),
('(to', 52),
('ñwhy', 51),
('(baptist)', 50),
('ñwe', 50),
('ñharvard', 50),
('ñall', 49),
('ñc', 49),
('¥-', 49),
('ñn', 49),
('ñhow', 49),
('(not', 49),
('(mich', 48),
('ñwhen', 48),
('ñwhat', 48),
('sentinel_', 46),
('day)', 46),
('ñhe', 46),
('excepted)', 46),
('(rom', 45),
('ñthey', 45),
('ñsigns', 45),
('(concluded', 45),
('(size', 45),
('(italics', 43),
('~~', 42),
('(john', 42),
('[mr', 42),
('(second', 42),
('#', 42),
('£', 42),
('ó', 41),
('(matt', 40),
('ñthere', 40),
('ñfrom', 39),
('[new', 39),
('[this', 39),
('inches)', 38),
('(acts', 38),
('\ufeff', 37),
('ñbecause', 37),
('¦', 37),
('to¥', 37),
('ñcatholic', 36),
('(with', 36),
('(if', 36),
('[by', 36),
('[or', 36),
('(page', 35),
('(works', 35),
('ñyou', 35),
('sabbathñthe', 35),
('ñoñ', 35),
('¥-¥', 35),
('ñid', 34),
('(p', 34),
('ñone', 34),
('york)', 33),
('ñthen', 33),
('[not', 33),
('ñwho', 33),
('(dyspeptic)', 32),
('worldñto', 32),
('~', 32),
('c)', 32),
('ñpage', 32),
('ñreligious', 31),
('notes)', 31),
('ñcertainly', 31),
('¥of', 31),
('ñor', 31),
('catholic)', 30),
('(saturday)', 30),
('ñpresent', 30),
('ñof', 30),
('ñpresbyterian', 29),
('(continuing', 29),
('}', 29),
('ñmr', 29),
('ñon', 29),
('(tenn', 29),
('(of', 29),
('(roman', 29),
('ñwith', 29),
('(r', 29),
('lawñthe', 29),
('[we', 29),
('church)', 28),
('ñjottings', 28),
('(mass', 28),
('sunday)', 28),
('(pa', 28),
('(except', 28),
('(a)', 27),
('>', 27),
('(no', 27),
('bookñ', 27),
('(exact', 27),
('catholics]', 26),
('ñst', 26),
('¥¥¥¥', 25),
('and¥', 25),
('(ps', 25),
('ñsabbath', 25),
('(that', 25),
('split)', 25),
('(who', 25),
('church]', 25),
('bibleñ', 25),
('(from', 25),
('ñ]', 24),
('(mr', 24),
('(capillary', 24),
('[a', 24),
('^', 23),
('(b)', 23),
('thisñourñour', 23),
('(this', 23),
('(though', 23),
('(cal', 23),
('a¥', 23),
('the_', 23),
('specimen)', 23),
('[that', 23),
('(luke', 23),
('ñjohn', 23),
('attraction)', 23),
('ñwill', 22),
('of¥', 22),
('(late', 22),
('refused)', 22),
('churchñ', 22),
('ñdo', 22),
('a_', 22),
('[italics', 22),
('(heb', 22),
('pages)', 22),
('[to', 22),
('(by', 22),
('(once', 22),
('(col', 21),
('a)', 21),
('[roman', 21),
('ñsimply', 21),
('(london', 21),
('self=pronouncing', 21),
('(dan', 21),
('ñsuch', 21),
('(isa', 21),
('`we', 21),
('mealñ(an)', 21),
('godñthe', 21),
('ñpeoplesñthat', 21),
('_the', 21),
('missionaryñby', 20),
('(c)', 20),
('ñwell', 20),
('(catholic)', 20),
('(over', 20),
('(gen', 20),
('[applause', 20),
('recipesñpost-paid', 20),
('it)', 19),
('ñjames', 19),
('itñ', 19),
('coã', 19),
('plorerñby', 19),
('ñdr', 19),
('¥a', 19),
('`and', 19),
('ñare', 19),
('sentinel¥', 19),
('ñhas', 19),
('ñw', 19),
('[christ]', 19),
('ñsan', 19),
('ñonly', 19),
('guineañby', 18),
('[sunday', 18),
('ñwould', 18),
("'¥", 18),
('_american', 18),
('[and', 18),
('(verse', 18),
('ñeditor', 18),
('ñthese', 18),
('ñthose', 18),
('`i', 18),
('(puritan)', 18),
('(we', 18),
('ñnellie', 18),
('christian(', 18),
('ñat', 18),
('statesñthe', 18),
('[mention', 18),
('chairmanñ', 17),
('is¥', 17),
('churchñthe', 17),
('(having)', 17),
('ñchicago', 17),
('city)', 17),
('(based', 17),
('sentinel)', 17),
('i)', 17),
('congoñby', 17),
('ñspringfield', 17),
('ñmen', 17),
("cushing's)", 17),
('¥and', 17),
('(chicago)', 17),
('ñmrs', 17),
('¥in', 17),
('ñshe', 17),
('ñhistory', 17),
('ñen', 17),
('û', 17),
('edition)', 17),
('sunday]', 17),
('(i)', 16),
('ñso', 16),
("ñman's", 16),
('godñ', 16),
('ñgreat', 16),
('ñhis', 16),
('`it', 16),
('(illustrated)', 16),
('-*', 16),
('(eph', 16),
('ñwas', 16),
('ñoh', 16),
('ñlet', 16),
('(applause', 16),
('ñjust', 16),
('~~¥', 16),
('nigerñ', 16),
('numbers)', 16),
('melanesiañby', 16),
('ñbaptist', 15),
('t)', 15),
('♦', 15),
('missionsñby', 15),
('in¥', 15),
('a¥nd', 15),
('statesñ', 15),
('ñp', 15),
('ñking', 15),
('day]', 15),
('religionñthe', 15),
('*-', 15),
('(mark', 15),
('state]', 15),
('(continued', 15),
('law]', 15),
('(including', 15),
('a_n', 15),
('isñ', 15),
('governmentñthe', 15),
('mangañby', 14),
('(water', 14),
('sundayñ', 14),
('(minn', 14),
("'ñ", 14),
('ñsunday', 14),
('independenceñthe', 14),
('(ind', 14),
('page)', 14),
('ñu', 14),
('ñfood', 14),
('ñandñ', 14),
('l¥', 14),
('¥*', 14),
('nineteenthñwill', 14),
('¥to', 14),
('washington)', 14),
('¥that', 14),
('andñ', 14),
('in_', 14),
('ñsunday-law', 14),
('concordanceña', 14),
('villageñone', 14),
('ñart', 14),
('ñh', 14),
('()', 14),
('ñgod', 14),
('*from', 14),
('ñevangel', 14),
('catholic]', 14),
('for¥', 14),
('sundayñthe', 14),
('ñaddressñ', 14),
('helpsñembracing', 13),
('ñboston', 13),
('powerñthe', 13),
('(sunday', 13),
('manñthe', 13),
('`to', 13),
('truthñthe', 13),
('law)', 13),
('ñeven', 13),
('(london)', 13),
('le/vites', 13),
('landsñby', 13),
('r¡', 13),
('[christian', 13),
('extras)', 13),
("ñ'", 13),
('[catholic]', 13),
('appealñnational', 13),
('ñwhether', 13),
('`¥', 13),
('beñhow', 13),
('invadedñthe', 13),
('dayñthe', 13),
('dayñand', 13),
('(ex', 13),
('(fleming', 13),
('ñmay', 13),
('[laughter', 13),
('government)', 13),
('ñbible', 12),
('(but', 12),
('ãã', 12),
('(about', 12),
('(s', 12),
('empireñwhat', 12),
('_a', 12),
('(gal', 12),
('_of', 12),
('ñgeorge', 12),
('<at', 12),
('christñthe', 12),
('/dominion', 12),
('as¥', 12),
('worldñthe', 12),
('politicalñthomas', 12),
('`for', 12),
('helpsñnumerous', 12),
('(he', 12),
('(neb', 12),
('stateñthe', 12),
('illustrationsñsplendid', 12),
('ñdid', 12),
('i¥', 12),
('uaryña', 12),
('allñthe', 11),
('mapsñconcordanceñsubject', 11),
('{', 11),
('(presbyterian)', 11),
('*the', 11),
('states]', 11),
('ñsamuel', 11),
('ñmoral', 11),
('historyñby', 11),
('state)', 11),
('`this', 11),
("¥'", 11),
('(it', 11),
('or¥', 11),
('them)', 11),
('lawñ', 11),
('ñnamely', 11),
('re_', 11),
('[for', 11),
('¥i', 11),
('thisñthe', 11),
('peopleñthe', 11),
('(ill', 11),
('be)', 11),
('sabbath)', 11),
('ñbishop', 11),
('(delivered', 11),
('ñde', 11),
('therein)', 11),
('worldñ', 11),
('ñnow', 11),
('chinañby', 11),
("(gentleman's", 11),
('cover)', 11),
('ñtheñ', 11),
('(methodist)', 11),
('size)', 11),
('indexñvocabu-', 10),
('(d)', 10),
('/and', 10),
('sentinel]', 10),
('(eze', 10),
('ñfaith', 10),
('ñindependent', 10),
('ñwhere', 10),
('ñwere', 10),
('la*', 10),
('states)', 10),
('/ah', 10),
('ñalsoñ', 10),
('(specimen', 10),
('ñreligion', 10),
('`an', 10),
('ha/rim', 10),
('(an', 10),
('more)', 10),
('(pp', 10),
('churchesñas', 10),
('ñj', 10),
('__', 10),
('(the)ñby', 10),
('religionñand', 10),
('``', 10),
('ñreview', 10),
('bookñthe', 10),
('(alexander', 10),
('[present', 10),
('nomñany', 10),
('this¥', 10),
('postñoffice', 10),
('ç', 10),
('con_', 10),
('`if', 10),
('(such', 10),
('themñ', 10),
('peoplesñ', 10),
('bible]', 10),
('%c', 10),
('others)', 10),
('governmentñ', 10),
("')", 10),
('ñs', 10),
('`a', 10),
('him)', 10),
('godñand', 10),
('`that', 10),
('ñsome', 10),
('[is]', 10),
('(two-thirds', 10),
('(st', 10),
('morality]', 10),
('and_', 10),
('campbell)', 10),
('*this', 9),
('¥be', 9),
('adventist)', 9),
('manñ', 9),
('union)', 9),
('`no', 9),
('(so', 9),
('ñamerican', 9),
('itñand', 9),
('[see', 9),
('e¥', 9),
('ñliberty', 9),
('(at', 9),
('[his', 9),
('`almost', 9),
('ñfrances', 9),
('cents)', 9),
('peopleña', 9),
('¥by', 9),
('ñkatherine', 9),
('ñchurch', 9),
('ñyour', 9),
('ñnothing', 9),
('(one', 9),
('ñsee', 9),
('(art', 9),
('stateñ', 9),
('ñdoes', 9),
('baptist)', 9),
('(iowa)', 9),
('earthñthe', 9),
('be¥', 9),
('(poetry', 9),
('ñsir', 9),
('itñthe', 9),
('was¥', 9),
('ñtheir', 9),
('papacy]', 9),
('ñlondon', 9),
('ñselected', 9),
('pope]', 9),
('[as', 9),
('ñupon', 9),
('ñcan', 9),
('libertyñthe', 9),
('homeñ', 9),
('of_', 9),
('constitution)', 8),
('(signed)', 8),
('gospelñthe', 8),
('ñofficial', 8),
('beñ', 8),
('do)', 8),
('their¥', 8),
('ñjudge', 8),
('oneñthe', 8),
('peopleñ', 8),
('[small]', 8),
('laws)', 8),
('`yes', 8),
('large]', 8),
('watchwordñthe', 8),
('weekñthe', 8),
('=the', 8),
('weightñwithout', 8),
('speedñwonderful', 8),
('keysñthirty', 8),
('sentinelñdear', 8),
('societyñmrs', 8),
('(some', 8),
('wasñ', 8),
('(their', 8),
('offerñby', 8),
('menñ', 8),
('governmentñthat', 8),
('(v', 8),
('(james', 8),
('touchñlight', 8),
('[here', 8),
('(micr', 8),
('constitutionalñfrom', 8),
('keyboardñuniversal', 8),
('lawñthat', 8),
('alvierica_n', 8),
('ñloyal', 8),
('¥¥-¥', 8),
('ñgold', 8),
('ñthough', 8),
('\\ttin', 8),
('ñchris-', 8),
('ñcolorado', 8),
('that_', 8),
('*as', 8),
('ñpublius', 8),
('[large]', 8),
('is)', 8),
('ñour', 8),
('churches)', 8),
('_to', 8),
('ñmelbourne', 8),
('siredñmedium', 8),
('re¥', 8),
('case)', 8),
('cut)', 8),
('°', 8),
('(seventh-day', 8),
('sayñ', 8),
('¥¥¥¥¥', 8),
('copyrighted)', 8),
('constitution]', 8),
('(papacy)', 8),
('alignmentñperfect', 8),
('ñsilver', 8),
('(brown)', 8),
('(without', 8),
('languagesñseven', 8),
('(h', 8),
('sabbath]', 8),
('course)', 8),
('(still', 8),
('(february', 8),
('`one', 8),
('patenteeñto', 8),
('ñevery', 8),
('(bourgeois)', 8),
('godñin', 8),
('\\t', 8),
('week)', 8),
('¥-¥-¥', 8),
('wheelñsteel', 8),
('(eng', 8),
('ñsel', 8),
('sabbathñis', 8),
('christians)', 8),
('governmentña', 8),
('romeñthe', 8),
('[loud', 8),
('[cheers', 8),
('(unless', 8),
('ours)', 7),
('semiteñand', 7),
('ñfree', 7),
('(laughter', 7),
('people)', 7),
('¥-¥¥', 7),
('(wis', 7),
('(according', 7),
('nationñthe', 7),
('dayñ', 7),
('christianityñthe', 7),
('i/', 7),
('country)', 7),
('to_', 7),
('©', 7),
('labor]', 7),
('ñdecline', 7),
('(conclusion', 7),
('god)', 7),
('(boston)', 7),
('we¥', 7),
('beast]', 7),
('ñthree', 7),
('man)', 7),
('(unitarian)', 7),
('¥but', 7),
('ñmy', 7),
('say)', 7),
('[very', 7),
('(hear', 7),
('(-', 7),
('wordsñ', 7),
('all)', 7),
('ñgibbon', 7),
('(working', 7),
('(when', 7),
('_that', 7),
('(although', 7),
('world)', 7),
('self=cleaning', 7),
('historyñthemes', 7),
('libertyñ', 7),
('be_', 7),
('sabbathñ', 7),
('but]', 7),
('ñshall', 7),
('(lev', 7),
('[meaning', 7),
('religionñ', 7),
('(it)', 7),
('(on', 7),
('-_', 7),
('[n', 7),
('*i', 7),
('ñafter', 7),
('nationñwhat', 7),
('iã', 7),
('-)', 7),
('constitutionñthe', 7),
('ñjewish', 7),
('peopleñand', 7),
('theñ', 7),
('(phil', 7),
('have¥', 7),
('(civil', 7),
('bathñthe', 7),
('matterñas', 7),
('ñdetroit', 7),
('ñcol', 7),
('(they', 7),
('*a', 7),
('godña', 7),
('„', 7),
('(chap', 7),
('moral]', 7),
('mcallisterñno', 7),
('ca_n', 7),
('so)', 7),
('ñlessons', 7),
('[which', 7),
('ñpublishers)', 7),
('tionñthe', 7),
('people]', 7),
("te(ichelis'", 7),
('-(', 7),
('stateña', 7),
('lawñto', 7),
('(section', 7),
('themñand', 7),
('ñhave', 7),
('[hear', 7),
('that¥', 7),
('ñhad', 7),
('m¥', 7),
('[special', 7),
('papacyñthe', 7),
('toast)', 7),
('tionñwhat', 7),
('ö', 7),
('[made', 7),
('(t', 7),
('`you', 7),
('`my', 7),
('menñthe', 7),
('_and', 7),
('menña', 7),
('johnsonñ', 7),
('thisñthat', 7),
('[god]', 7),
('✓', 7),
('ñsince', 7),
('organ)', 7),
('dayña', 7),
('(now', 7),
('[cries', 7),
('no*', 7),
('[it', 7),
('(all', 6),
('ñaddress', 6),
('america_n', 6),
('way)', 6),
('ñprice', 6),
('[worship]', 6),
('themñthat', 6),
('christianityñ', 6),
('ñadvent', 6),
('not)', 6),
('badñ(having)', 6),
('g)', 6),
('ñsurroundings', 6),
('ñindeed', 6),
('sundayña', 6),
('(two', 6),
('a\\', 6),
('`our', 6),
('(mo', 6),
('¥or', 6),
('ñcloth', 6),
('stateñby', 6),
('ci)', 6),
('ñpaul', 6),
('born)', 6),
('%/', 6),
('society)', 6),
('done)', 6),
('¥for', 6),
('(vol', 6),
('ñgalen', 6),
('o)', 6),
('tian(', 6),
('e)', 6),
('ñstate', 6),
('ñyea', 6),
('ñshould', 6),
('[sun-', 6),
('stã', 6),
("jonesñ'", 6),
('¥`', 6),
('ñspain', 6),
('[i', 6),
('ñminnie', 6),
('saysñ', 6),
('peopleñthat', 6),
('ñwhile', 6),
('congressñthe', 6),
('`sunday', 6),
('`in', 6),
('[if', 6),
('faithñ', 6),
('(whether', 6),
('/-', 6),
('ñjoaquin', 6),
('rica_n', 6),
('(like', 6),
('margin]', 6),
('ñgood', 6),
('¥is', 6),
('laws]', 6),
('`is', 6),
('ñofñ', 6),
('countryñthe', 6),
('statesñreligious', 6),
('adventists)', 6),
('faithñneeds', 6),
('[st', 6),
('safe]', 6),
('a_ivierican', 6),
('-¥-¥', 6),
('~¥', 6),
('power]', 6),
('countryñ', 6),
('excellentñchristian', 6),
('thingsñthe', 6),
('butñ', 6),
('ñibid', 6),
('(texas)', 6),
('i*', 6),
('(code', 6),
('(may', 6),
('dayñare', 6),
('<<', 6),
('wordsña', 6),
('ñprof', 6),
('lawñis', 6),
('ñdear', 6),
('[under', 6),
('statesñand', 6),
('s\x8eance', 6),
('societyñl', 6),
('ñnever', 6),
('ñunited', 6),
('revealedñthat', 6),
('ñmost', 6),
('libraryñ', 6),
('ñpp', 6),
('r¥', 6),
('worship)', 6),
('crime(', 6),
('government]', 6),
('(sun-', 6),
('ñblind', 6),
('¥with', 6),
('journal==', 6),
('sentinelñ', 6),
('¥¥¥¥¥¥', 6),
('it¥', 6),
('/i', 6),
('caesarñthe', 6),
('churchñto', 6),
('ñcharles', 6),
('ñed', 6),
('ñdayton', 6),
('(even', 6),
('(verses', 6),
('¥are', 6),
('margin)', 6),
('kindñwithin', 6),
('anierica_n', 6),
('peopleñis', 6),
('authorityñthe', 6),
('congress)', 6),
('`god', 6),
('religion)', 6),
("ñd'aubigne", 6),
('addressñ', 6),
('rome]', 6),
('(catholic', 6),
('god]', 6),
('-¥¥', 6),
(')-', 6),
('religion]', 6),
("o'neil]", 6),
('(md', 6),
('useñexcellent', 6),
('itñto', 6),
('ours]', 6),
('ò', 6),
('b¥', 6),
('beña', 6),
(')f', 6),
('`remember', 6),
('¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥', 6),
('ñfair', 6),
('ha¥s', 6),
('(under', 6),
('hear)', 6),
('(ohio)', 6),
('(december', 6),
('raiira¥', 6),
('¥on', 6),
('(kansas)', 6),
('¨', 6),
('ñd', 5),
('(showing', 5),
('[living]', 5),
('de¥', 5),
('adventists]', 5),
('ôc', 5),
('papacyñthat', 5),
('on¥', 5),
('manualñexpert', 5),
('churches]', 5),
('days)', 5),
('calculatorñliterary', 5),
('christian)', 5),
('crafts)', 5),
('(jer', 5),
('ho)', 5),
('should¥', 5),
('geary]', 5),
('i(', 5),
('christñ', 5),
('/s', 5),
('man¥', 5),
('nuisanceñthe', 5),
('reform]', 5),
('asñ', 5),
('father]', 5),
('especiallyñwriters', 5),
('ñeither', 5),
('*¥', 5),
('ñhere', 5),
('ñabout', 5),
('bibleñand', 5),
('ment)', 5),
('addressñpacific', 5),
("(milman's", 5),
('ñsalesmen', 5),
("'ñthe", 5),
('¥as', 5),
('ñpresident', 5),
('`t', 5),
('ñgo', 5),
('keyñthe', 5),
('[at', 5),
('ñharvest', 5),
('(copyrighted)', 5),
('priceñattractive', 5),
('ñmargaret', 5),
('liberty]', 5),
('f¥', 5),
("'`", 5),
('himñ', 5),
('principles¥', 5),
('(order', 5),
('ñdean', 5),
('power)', 5),
('[these', 5),
('societyña', 5),
('principlesñthe', 5),
('ha/nan', 5),
('beastñthe', 5),
('sab_', 5),
('ñmail', 5),
('`as', 5),
('[accounted', 5),
('ñhalf', 5),
('iñ', 5),
('-¥-', 5),
('christ)', 5),
('powerñ', 5),
('his¥', 5),
('(companion', 5),
('firstñthe', 5),
('are¥', 5),
("¡'", 5),
...]
No obvious foreign language character use. "ñ" appears attached to words as an OCR noise element.
# %load shared_elements/normalize_characters.py
prev = "baseline"
cycle = "correction1"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
# Substitute for all other dashes
content = re.sub(r"—-—–‑", r"-", content)
# Substitute formatted apostrophe
content = re.sub(r"\’\’\‘\'\‛\´", r"'", content)
# Replace all special characters with a space (as these tend to occur at the end of lines)
content = re.sub(r"[^a-zA-Z0-9\s,.!?$:;\-&\'\"]", r" ", content)
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction1 Average verified rate: 0.9493256707400606 Average of error rates: 0.051667561921814396 Total token count: 8518613
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 500 )
[('-', 13223),
("'", 8384),
('tion', 6765),
('con-', 5889),
('re-', 5797),
('t', 4325),
('ment', 4118),
('in-', 3972),
('d', 3947),
('e', 3885),
('w', 3760),
('co', 3427),
('m', 3164),
('n', 2998),
('de-', 2931),
('com-', 2716),
('be-', 2455),
('pro-', 2155),
('f', 2010),
('r', 1978),
('sun-', 1967),
('chris-', 1947),
('ex-', 1880),
('th', 1703),
('tions', 1683),
('en-', 1577),
('dis-', 1566),
('govern-', 1495),
('g', 1356),
('gov-', 1340),
('per-', 1284),
('sab-', 1254),
('tian', 1190),
('mo', 1156),
('un-', 1145),
('na-', 1136),
('ernment', 1070),
('reli-', 1058),
('ance', 1036),
('ob-', 1011),
('pre-', 999),
('ments', 947),
('ity', 944),
('sunday-law', 940),
('ad-', 939),
('tional', 914),
('u', 913),
('ac-', 913),
('x', 860),
('ligious', 838),
('im-', 816),
('ap-', 811),
('ple', 798),
('sub-', 740),
('ence', 722),
('an-', 700),
('gious', 675),
('relig-', 666),
('ers', 628),
('ques-', 625),
('peo-', 625),
('at-', 603),
("'the", 600),
('al-', 588),
('as-', 578),
('inter-', 562),
('to-', 556),
('pub-', 550),
('them-', 545),
('fol-', 536),
('ligion', 523),
('prin-', 523),
('constitu-', 521),
('erty', 518),
('sup-', 508)]
Correction 2 -- Correct line endings¶
Reconnect words that were split due to line-endings.
# %load shared_elements/correct_line_endings.py
prev = cycle
cycle = "correction2"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
content = re.sub(r"(\w+)(\-\s{1,})([a-z]+)", r"\1\3", content)
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction2 Average verified rate: 0.9777065723174374 Average of error rates: 0.024393763055804242 Total token count: 8357037
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 250 )
[('-', 13127),
("'", 8384),
('t', 4309),
('d', 3941),
('e', 3874),
('w', 3758),
('co', 3424),
('m', 3155),
('n', 2990),
('f', 1989),
('r', 1971),
('th', 1703),
('g', 1339),
('mo', 1156),
('sunday-law', 976),
('u', 911),
('x', 860),
("'the", 600),
('--', 418),
('pa', 415),
('q', 373),
('chain-gang', 353),
('sunday-closing', 335),
('tion', 327),
("the'", 313),
('k', 304),
('ex', 304),
('sunday-rest', 299),
('pp', 298),
('-the', 289),
("conscience'", 265),
('ch', 257)]
Correction 3 -- Remove extra dashes¶
# %load shared_elements/remove_extra_dashes.py
prev = cycle
cycle = "correction3"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
replacements = []
for token in tokens:
if token[0] is "-":
replacements.append((token, token[1:]))
elif token[-1] is "-":
replacements.append((token, token[:-1]))
else:
pass
if len(replacements) > 0:
# print("{}: {}".format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction3 Average verified rate: 0.9814938307988949 Average of error rates: 0.020385108922709644 Total token count: 8367534
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
[("'", 8554),
('t', 4413),
('e', 4034),
('d', 3964),
('w', 3794),
('co', 3624),
('m', 3204),
('n', 3085),
('r', 2060),
('f', 2026),
('th', 1718),
('g', 1378),
('mo', 1162),
('u', 928),
('x', 864),
("'the", 601),
('ex', 539),
('re', 505),
('pa', 423),
('q', 399),
('sunday-law', 334),
('tion', 328),
('k', 319),
("the'", 313),
('pp', 299),
("conscience'", 265),
('ch', 260),
('seventhday', 249),
("'of", 238),
('ti', 228),
('ga', 227),
('oc', 219),
('z', 215),
('wm', 215),
('un', 210),
('satolli', 210),
('employes', 209),
('munn', 206),
('ca', 205),
('al', 204),
('yo', 202),
('mc', 191),
('ry', 185),
('id', 185),
('ment', 183),
("'a", 175),
('sunday-closing', 159),
("'to", 154),
('se', 153),
('nd', 152)]
Correction 4 -- Remove extra quotation marks¶
# %load shared_elements/remove_extra_quotation_marks.py
prev = cycle
cycle = "correction4"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
corrections = []
for token in tokens:
token_list = list(token)
last_char = token_list[-1]
if last_char is "'":
if len(token) > 1:
if token_list[-2] is 's' or 'S':
pass
else:
corrections.append((token, re.sub(r"'", r"", token)))
else:
pass
elif token[0] is "'":
corrections.append((token, re.sub(r"'", r"", token)))
else:
pass
if len(corrections) > 0:
# print('{}: {}'.format(filename, corrections))
for correction in corrections:
content = clean.replace_pair(correction, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction4 Average verified rate: 0.9824621432313876 Average of error rates: 0.01930259623992838 Total token count: 8367328
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
[("'", 8053),
('t', 4487),
('e', 4070),
('d', 3979),
('w', 3797),
('co', 3626),
('m', 3209),
('n', 3104),
('r', 2078),
('f', 2046),
('th', 1721),
('g', 1386),
('mo', 1165),
('u', 930),
('x', 865),
('ex', 539),
('re', 508),
('pa', 427),
('q', 401),
('sunday-law', 334),
('tion', 329),
('k', 320),
("the'", 304),
('pp', 299),
("conscience'", 261),
('ch', 260),
('seventhday', 249),
('ti', 230),
('ga', 228),
('oc', 219),
('z', 217),
('wm', 215),
('un', 210),
('satolli', 210),
('employes', 209),
('al', 208),
('munn', 207),
('ca', 206),
('yo', 203),
('mc', 191),
('id', 189),
('ry', 186),
('ment', 183),
('sunday-closing', 160),
('nd', 159),
('se', 157),
('tinel', 147),
('ll', 141),
('il', 137),
('chain-gang', 136)]
Correction 5 -- Rejoin Burst Words¶
Check errors to see if they form verified tokens when joined with the preceding token.
# %load shared_elements/rejoin_burst_words.py
prev = cycle
cycle = "correction5"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
pattern = re.compile("(\s(\w{1,2}\s){5,})")
replacements = []
clean.check_splits(pattern, spelling_dictionary, content, replacements)
if len(replacements) > 0:
# print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction5 Average verified rate: 0.9824669426623706 Average of error rates: 0.01929677708146822 Total token count: 8367280
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
[("'", 8053),
('t', 4483),
('e', 4047),
('d', 3973),
('w', 3794),
('co', 3626),
('m', 3202),
('n', 3097),
('r', 2062),
('f', 2043),
('th', 1721),
('g', 1386),
('mo', 1165),
('u', 930),
('x', 865),
('ex', 539),
('re', 508),
('pa', 427),
('q', 401),
('sunday-law', 334),
('tion', 329),
('k', 320),
("the'", 304),
('pp', 299),
("conscience'", 261),
('ch', 260),
('seventhday', 249),
('ti', 230),
('ga', 228),
('oc', 219),
('z', 217),
('wm', 215),
('un', 210),
('satolli', 210),
('employes', 209),
('al', 208),
('munn', 207),
('ca', 206),
('yo', 203),
('mc', 191),
('id', 189),
('ry', 186),
('ment', 183),
('sunday-closing', 160),
('nd', 159),
('se', 157),
('tinel', 147),
('ll', 141),
('il', 137),
('chain-gang', 136)]
Correction 6 -- Rejoin Split Words¶
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction6"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
errors = reports.identify_errors(tokens, spelling_dictionary)
replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=False)
if len(replacements) > 0:
# print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_split_words(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction6 Average verified rate: 0.9828407009325851 Average of error rates: 0.018864070426738287 Total token count: 8365027
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )
[("'", 8053),
('t', 4443),
('e', 3991),
('d', 3960),
('w', 3791),
('co', 3415),
('m', 3195),
('n', 3057),
('r', 2043),
('f', 2042),
('th', 1658),
('g', 1377),
('mo', 1161),
('u', 927),
('x', 864),
('ex', 523),
('pa', 410),
('q', 399),
('sunday-law', 334),
('k', 318),
("the'", 304),
('pp', 299),
('tion', 281),
('re', 279),
("conscience'", 260),
('ch', 254),
('seventhday', 249),
('ga', 221),
('oc', 218),
('z', 215),
('wm', 215),
('satolli', 210),
('employes', 209),
('munn', 207),
('ti', 203),
('id', 186),
('ry', 183),
('al', 175),
('un', 174),
('ment', 173),
('ca', 170),
('sunday-closing', 160),
('nd', 158),
('tinel', 147),
('se', 143),
('ll', 141),
('chain-gang', 136),
("to'", 134),
('il', 127),
('lb', 125),
('bateham', 122),
('cmsar', 121),
('aleck', 112),
("and'", 109),
('socalled', 106),
('te', 105),
('sunday-rest', 104),
('cc', 104),
('sentin', 104),
('milly', 103),
('erican', 102),
("of'", 98),
('va', 97),
('cd', 94),
('nt', 93),
('fellow-citizens', 92),
('vt', 92),
('ican', 90),
('op', 90),
('tt', 89),
('ft', 89),
('aa', 89),
('-', 88),
("a'", 86),
('eze', 84),
('attorney-general', 83),
('ma', 83),
('csar', 81),
('stundists', 80),
('cereola', 79),
('lc', 79),
('ay', 78),
('neander', 78),
('ne', 77),
('li', 77),
('religio-political', 76),
('tions', 75),
('mc', 74),
('law-abiding', 74),
('sundaylaw', 74),
('ni', 73),
('rican', 73),
('ra', 73),
('ia', 72),
('edmunds', 71),
('rd', 71),
("crafts's", 70),
('es', 70),
("is'", 69),
('si', 69),
('sr', 69),
('freethought', 68),
("in'", 67),
("crafts'", 66),
("folks'", 66),
("cmsar's", 66),
('un-american', 64),
('sabbath-day', 63),
('rest-day', 62),
('ity', 61),
('ie', 61),
('ic', 60),
('sabbath-breaking', 60),
("''", 59),
('mt', 59),
("'s", 59),
('pr', 58),
('na', 57),
('ac', 57),
('ernment', 56),
('candidus', 56),
("barbers'", 56),
('paeifie', 56),
('ments', 55),
('tregelles', 54),
('geikie', 54),
("that'", 54),
('ri', 53),
("an'", 53),
('ce', 53),
('dred', 53),
('employe', 52),
("it'", 52),
('ky', 51),
('assoeiation', 51),
('ob', 51),
('litt', 51),
('wellknown', 50),
('tional', 50),
('erty', 50),
('coxey', 50),
('ci', 50),
('ofthe', 49),
('ary', 49),
('vo', 48),
('tian', 48),
('aro', 48),
('ph', 48),
('ow', 47),
('pre', 47),
('ple', 47),
('ent', 47),
('fa', 47),
('cathedra', 46),
("jones'", 46),
('ro', 46),
('leiper', 45),
('mi', 45),
('forit', 45),
("citizens'", 44),
('sh', 43),
('sundayclosing', 43),
('judefind', 42),
('wo', 42),
('ei', 42),
('times-herald', 42),
('chaingang', 42),
('ct', 42),
('tischendorf', 42),
('ers', 41),
('bo', 41),
("o'keefe", 41),
('hiberty', 41),
('em', 41),
('cr', 41),
("be'", 41),
('merican', 40),
("law'", 40),
('ica', 40),
('sunday-keeping', 40),
('ea', 40),
('yo', 40),
('chapelle', 40),
('ip', 40),
('ance', 39),
("i'", 39),
('oi', 39),
('keane', 39),
('tv', 39),
('copygraph', 39),
("waterman's", 38),
('lachmann', 38),
('kai', 38),
("cruden's", 38),
('oa', 37),
('non-sectarian', 37),
('adress', 37),
("csar's", 37),
('saye', 37),
('ly', 37),
('ther', 37),
('ta', 37),
('io', 37),
('church-and-state', 37),
("american'", 37),
('terest', 36),
('tr', 36),
('coxe', 36),
('dwyer', 36),
("for'", 35),
("not'", 35),
('da', 35),
('pf', 35),
('tir', 35),
('td', 35),
('mass-meeting', 35),
('swiggart', 35),
("this'", 35),
('stitution', 35),
('ba', 35),
('fi', 35),
('az', 35),
('law-making', 34),
('first-day', 34),
('jagoe', 34),
('inthe', 34),
('godgiven', 34),
('ns', 34),
("church'", 34),
('entinel', 34),
('sa', 33),
('oo', 33),
('brunot', 33),
('ork', 33),
('ful', 33),
('cwsar', 33),
('ts', 33),
('gious', 33),
('eh', 33),
('cl', 33),
('ss', 32),
("as'", 32),
('vice-presidents', 32),
('base-ball', 32),
('ap', 32),
("infants'", 32),
('rt', 32),
('saloon-keepers', 32),
('lt', 31),
('sun-worship', 31),
("liberty'", 31),
('rs', 31),
('one-seventh', 31),
('prayer-meeting', 31),
('slattery', 31),
('colitical', 30),
('efical', 30),
('mn', 30),
('vox', 30),
('fr', 30),
('ith', 30),
('ao', 29),
('os', 29),
('mg', 29),
("are'", 29),
('oe', 29),
("roberts'", 29),
('ge', 29),
('tc', 29),
('rn', 29),
('kauffman', 29),
('fo', 29),
('crowther', 29),
("workingmen's", 29),
('puplishing', 29),
("all'", 29),
('holidayism', 28),
('eral', 28),
('self-preservation', 28),
('street-cars', 28),
("parkhurst's", 28),
('charta', 28),
('dei', 28),
('newyork', 28),
('durborow', 28),
('liberty-loving', 28),
('om', 28),
('zwiebach', 28),
("fathers'", 28),
('non-catholics', 28),
('tl', 28),
('fair-minded', 27),
("krug's", 27),
("sabbath'", 27),
('non-observance', 27),
('iu', 27),
('schurman', 27),
("cushing's", 27),
('ve', 27),
('mccauley', 27),
('ou', 27),
('self-defense', 27),
('theo', 27),
('fellow-man', 27),
("gibbons'", 27),
("or'", 26),
('counter-arguments', 26),
('sabbathkeeping', 26),
('ence', 26),
('ablegate', 26),
('fora', 26),
('fide', 26),
('platt', 26),
('thon', 26),
("with'", 26),
('pany', 26),
('itis', 26),
('hto', 26),
('bula', 26),
("god'", 26),
('ig', 26),
("which'", 26),
('pecci', 26),
('divinely-appointed', 26),
('ae', 26),
('non-religious', 26),
("by'", 26),
('ous', 26),
('selfgovernment', 26),
('loth', 25),
("lions'", 25),
('weakley', 25),
('eferson', 25),
('ab', 25),
('nethinim', 25),
('weyler', 25),
('feligious', 25),
('fah', 25),
("d'aubigne", 25),
('martinelli', 25),
('tn', 25),
("at'", 25),
('sundayschool', 25),
('ws', 25),
('ridpath', 25),
('tne', 25),
('publishinc', 25),
('ut', 24),
('krug', 24),
('ceesar', 24),
('stuttle', 24),
('mehan', 24),
('tothe', 24),
('tiie', 24),
('ligion', 24),
('ee', 24),
("preachers'", 24),
('ber', 24),
("if'", 24),
('phelan', 24),
('ib', 23),
('humbert', 23),
('ious', 23),
('lawabiding', 23),
('twentyfive', 23),
('atterbury', 23),
('sunday-sabbath', 23),
('nn', 23),
('arierican', 23),
('ble', 23),
("saints'", 23),
('tbe', 22),
('anb', 22),
('reli', 22),
('ili', 22),
('ef', 22),
('od', 22),
('bt', 22),
('tb', 22),
('ligious', 22),
("have'", 22),
('sas', 22),
('scudder', 22),
('sel', 22),
('wi', 22),
('gi', 22),
('anti-christian', 22),
("we'", 22),
('jeferson', 22),
('xact', 21),
("pub'rs", 21),
("grocers'", 21),
("e'", 21),
('comegys', 21),
('scovel', 21),
('sevent', 21),
('tianity', 21),
('tivity', 21),
('po', 21),
('ject', 21),
('mee', 21),
('ress', 21),
('witham', 21),
('thos', 21),
('ng', 21),
('yefferson', 21),
('ive', 21),
("sunday'", 21),
('notgive', 21),
('eousness', 21),
('postmaster-general', 20),
("hutchings'", 20),
('washburne', 20),
("he'", 20),
('religiopolitical', 20),
('kellog', 20),
('romer', 20),
('healthgiving', 20),
("soldiers'", 20),
('rr', 20),
("satolli's", 20),
('mcglynn', 20),
('sien', 20),
("from'", 20),
("hours'", 20),
('pt', 20),
('anierican', 20),
('rosemond', 20),
("vick's", 20),
('parens', 20),
('bythe', 20),
("on'", 20),
('eign', 20),
('longnecker', 20),
("was'", 20),
("printers'", 19),
('anierica', 19),
('pressense', 19),
('fbr', 19),
('micr', 19),
('everts', 19),
('rorabacher', 19),
("pastors'", 19),
("gov't", 19),
('tp', 19),
('iti', 19),
('ies', 19),
('ible', 19),
('seelye', 19),
('arther', 19),
('wishart', 19),
("people'", 19),
('cosgrove', 19),
('gt', 19),
('det', 19),
('lery', 19),
('abbe', 19),
('ey', 19),
('ation', 19),
("day'", 19),
('hagans', 19),
('montefiore', 19),
("will'", 19),
('governor-general', 19),
('chain-gangs', 19),
('law-makers', 19),
('stundist', 19),
('sundaykeeping', 18),
('dc', 18),
('tae', 18),
('enright', 18),
('anti-catholic', 18),
('non-interference', 18),
('tht', 18),
('oz', 18),
('efferson', 18),
("th'", 18),
('ol', 18),
('tarawera', 18),
('curlett', 18),
('tii', 18),
('tolstoi', 18),
('self-styled', 18),
('--', 18),
("would'", 18),
('nel', 18),
('um', 18),
('ccesar', 18),
('oity', 18),
('wa', 18),
('etteer', 18),
('avery-stuttle', 18),
('nnw', 17),
('mal', 17),
('bf', 17),
('prin', 17),
("righteousness'", 17),
('jt', 17),
('clingman', 17),
('cedarquist', 17),
('newyorkcity', 17),
('tra', 17),
('ical', 17),
('ricans', 17),
('saloon-keeper', 17),
('rubiana', 17),
('prisot', 17),
('post-offices', 17),
('theunited', 17),
('nu', 17),
("no'", 17),
('fl', 17),
('sabbathbreaking', 17),
("a'nan", 17),
('sommerville', 17),
('church-going', 17),
('vernment', 17),
('cood', 17),
('mullally', 17),
('self-governing', 17),
('ist', 17),
('bondst', 17),
('philpott', 17),
('law-breaker', 17),
('ik', 17),
('senti', 17),
('ul', 17),
('ame', 17),
('leivites', 17),
('pel', 17),
('vites', 17),
("apostles'", 17),
('hy', 17),
("schaff's", 16),
('dieu', 16),
('selfevident', 16),
('ag', 16),
('dayto', 16),
('ioo', 16),
('tf', 16),
('prepartion', 16),
('cp', 16),
("enright's", 16),
("his'", 16),
('mit', 16),
('oth', 16),
('relig', 16),
('thepeople', 16),
('sie', 16),
('alfaro', 16),
('symmachus', 16),
('xl', 16),
('su', 16),
('bi', 16),
('facto', 16),
('ples', 16),
('erromanga', 16),
('sunday-keepers', 16),
('dividual', 16),
('peryear', 16),
('peffer', 16),
('re-enact', 16),
('ish', 16),
('socialpurity', 16),
('tains', 16),
('cs', 16),
('wilkie', 16),
("l'", 16),
('hodgson', 16),
('basle', 16),
('bas', 16),
('zi', 15),
('tem', 15),
("who'", 15),
('ite', 15),
('sabbath-breakers', 15),
('americansentinel', 15),
('rhe', 15),
('nonsuch', 15),
('lieve', 15),
('pepsia', 15),
('co-workers', 15),
('gallinger', 15),
('labberton', 15),
('thatthe', 15),
('intrust', 15),
('lttra', 15),
('aw', 15),
('law-breakers', 15),
('milman', 15),
('plete', 15),
('lished', 15),
('rampolla', 15),
("christian'", 15),
('wellbeing', 15),
("milman's", 15),
('klip', 15),
('ons', 15),
('ctesar', 15),
("their'", 15),
('re-enacted', 15),
('populi', 15),
('governinent', 15),
('wor', 15),
('hach', 15),
('sc', 15),
("miles'", 15),
("ginn's", 15),
('ih', 15),
('janes', 15),
('ov', 15),
('sulus', 15),
('stinday', 15),
('xo', 15),
('ectarian', 15),
("o'gorman", 15),
('tkt', 15),
('ddress', 15),
("they'", 15),
('alvierica', 15),
('gress', 15),
('je', 15),
('birney', 15),
('ny', 15),
("religion'", 15),
('avery-stiittle', 15),
('sf', 15),
('tians', 15),
('np', 14),
('thb', 14),
('ver', 14),
('olic', 14),
('qa', 14),
("pres'ts", 14),
('secker', 14),
('intelligeneer', 14),
("'the", 14),
('yeferson', 14),
('ual', 14),
('self-exaltation', 14),
("tourists'", 14),
('chiniquy', 14),
('rittenhouse', 14),
('ormore', 14),
("moses'", 14),
('peo', 14),
('goverment', 14),
('thp', 14),
("sup'ts", 14),
("gault's", 14),
('cortlandt', 14),
('non-union', 14),
('br', 14),
("ccesar's", 14),
('anglo-saxons', 14),
('christain', 14),
('sp', 14),
('restday', 14),
('nr', 14),
('rv', 14),
('eemperance', 14),
('sabbath-breaker', 14),
('gb', 14),
("mcallister's", 14),
('rian', 14),
('malum', 14),
("williams'", 14),
("neander's", 14),
("adventists'", 14),
('lexow', 14),
('confreres', 14),
('thr', 14),
('ncluding', 14),
('af', 14),
('sient', 14),
('tution', 14),
('gl', 14),
('tennesseeans', 14),
('mu', 14),
("but'", 13),
('fon', 13),
('christ-like', 13),
('aivierican', 13),
('leaguers', 13),
('wu', 13),
("mf'g", 13),
('hoc', 13),
('dibbs', 13),
('anti-religious', 13),
('themies', 13),
('dont', 13),
('ex-president', 13),
('gr', 13),
("one'", 13),
('ine', 13),
('two-horned', 13),
('rp', 13),
("coxey's", 13),
('higinbotham', 13),
("t'", 13),
("protestants'", 13),
('pilman', 13),
('froni', 13),
('foi', 13),
('meeting-house', 13),
('mccourt', 13),
('thd', 13),
('waupon', 13),
("f'", 13),
("has'", 13),
('itt', 13),
('hiscock', 13),
('self-contradictory', 13),
("torry's", 13),
('cif', 13),
('gainst', 13),
("its'", 13),
('dn', 13),
('princi', 13),
('cer', 13),
('thi', 13),
('ec', 13),
('hee', 13),
('sabbathkeepers', 13),
('lelvites', 13),
('one-man', 13),
('tms', 13),
('rundschau', 13),
('tlie', 13),
('tax-payers', 13),
('non-christian', 13),
('self-appointed', 13),
("breeders'", 13),
('kossean', 13),
('olesen', 13),
('botkine', 13),
('ntinel', 13),
('volksraad', 13),
("whaley's", 12),
('constitu', 12),
('ke', 12),
('ets', 12),
('pm', 12),
('ess', 12),
('froin', 12),
('robb', 12),
('theire', 12),
('thein', 12),
('ors', 12),
('chappelle', 12),
("churches'", 12),
('self-constituted', 12),
('wouldbe', 12),
('entin', 12),
('week-day', 12),
('thority', 12),
('fast-day', 12),
("were'", 12),
('sk', 12),
('ex-mayor', 12),
('fortynine', 12),
('ture', 12),
('bok', 12),
('whitall', 12),
("cents'", 12),
('sition', 12),
('tte', 12),
('self-interest', 12),
('croker', 12),
("d'aubigne's", 12),
('merous', 12),
('cai', 12),
('combatting', 12),
('vention', 12),
('observa', 12),
('fp', 12),
('yonx', 12),
("such'", 12),
('ht', 12),
('ds', 12),
('masse', 12),
('self-respecting', 12),
('pc', 12),
('ivierican', 12),
('tobe', 12),
("do'", 12),
("christ'", 12),
('ki', 12),
("neat's", 12),
('twenty-fifth', 12),
('ttin', 12),
('maurer', 12),
('bondstreet', 12),
('inter-state', 12),
('lation', 12),
('ang', 12),
("any'", 12),
('rk', 12),
('gx', 12),
('sunday-observance', 12),
('havergal', 11),
("james'", 11),
('thechurch', 11),
('sm', 11),
('cz', 11),
('df', 11),
("dealers'", 11),
('ex-senator', 11),
('erson', 11),
('uncompromis', 11),
('mm', 11),
('ernments', 11),
('sherk', 11),
('fifty-second', 11),
('selfpreservation', 11),
('derstanding', 11),
('nished', 11),
('naw', 11),
('tre', 11),
("states'", 11),
('theni', 11),
("state'", 11),
('communica', 11),
('ular', 11),
('rose-wood', 11),
('androscoggin', 11),
("bakers'", 11),
('mis', 11),
('dition', 11),
('ure', 11),
('forthe', 11),
('taschereau', 11),
('qt', 11),
('tm', 11),
('griffitts', 11),
('fellow-workers', 11),
('kw', 11),
('bradfield', 11),
('houk', 11),
('fot', 11),
("so'", 11),
("'a", 11),
('amyot', 11),
('muskoka', 11),
('pl', 11),
('aivierica', 11),
('erties', 11),
('qf', 11),
('bl', 11),
('merica', 11),
('haye', 11),
('ost', 11),
('ev', 11),
('isthepapacyinprophecy', 11),
('sev', 11),
('mits', 11),
('notre', 11),
('key-note', 11),
('hirsch', 11),
('sealings', 11),
('rin', 11),
('evil-doers', 11),
('nott', 11),
("civil'", 11),
('ml', 11),
('ete', 11),
('kee', 11),
('rc', 11),
('yr', 11),
('ex-governor', 11),
('cramer', 11),
('lr', 11),
('fs', 11),
('informations', 11),
('paoipio', 11),
('twentyfour', 11),
('ridgetown', 11),
('axact', 11),
('times-democrat', 11)]
Correction 7 -- Rejoin Split Words II¶
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction7"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
errors = reports.identify_errors(tokens, spelling_dictionary)
replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=True)
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_split_words(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
AmSn18860101-V01-01-page3.txt: [('to', 're')]
AmSn18860301-V01-03-page1.txt: [('SEN', 'TINEL')]
AmSn18860301-V01-03-page3.txt: [('in', 'stitutions')]
AmSn18860301-V01-03-page8.txt: [('SEN', 'TINEL')]
AmSn18860401-V01-04-page1.txt: [('am', 'endmentists')]
AmSn18860601-V01-06-page1.txt: [('he', 're'), ('in', 'stitute')]
AmSn18860601-V01-06-page7.txt: [('PAR', 'LIAMENTARY'), ('AMER', 'ICAN')]
AmSn18860701-V01-07-page3.txt: [('fruit', 'ful')]
AmSn18860701-V01-07-page5.txt: [('a', 'ment')]
AmSn18860701-V01-07-page6.txt: [('Ch', 'aldea')]
AmSn18860801-V01-08-page1.txt: [('amen', 'dmentists')]
AmSn18860801-V01-08-page5.txt: [('the', 'Re')]
AmSn18860801-V01-08-page6.txt: [('to', 're')]
AmSn18860901-V01-09-page3.txt: [('Nation', 'al'), ('a', 'Na')]
AmSn18860901-V01-09-page4.txt: [('Amen', 'dmentists'), ('in', 'sistency')]
AmSn18860901-V01-09-page7.txt: [('and', 're')]
AmSn18861001-V01-10-page3.txt: [('when', 'ce')]
AmSn18861001-V01-10-page5.txt: [('be', 'ment')]
AmSn18861101-V01-11-page6.txt: [('the', 're')]
AmSn18861101-V01-11-page8.txt: [('Nation', 'al')]
AmSn18861201-V01-12-page1.txt: [('a', 're')]
AmSn18861201-V01-12-page2.txt: [('do', 'ers'), ('the', 'mis')]
AmSn18861201-V01-12-page6.txt: [('to', 're')]
AmSn18861201-V01-12-page7.txt: [('DE', 'STRUCTION'), ('ap', 'preciated')]
AmSn18870101-V02-01-page1.txt: [('fa', 'vorable')]
AmSn18870101-V02-01-page2.txt: [('Reform', 'ers'), ('SEN', 'TINEL'), ('re', 'sults'), ('in', 'dicated'), ('the', 're')]
AmSn18870101-V02-01-page3.txt: [('SEN', 'TINEL'), ('rep', 'robation')]
AmSn18870101-V02-01-page7.txt: [('sev', 'enty'), ('Constitution', 'al')]
AmSn18870301-V02-03-page2.txt: [('Social', 'istic'), ('in', 'scription')]
AmSn18870301-V02-03-page3.txt: [('la', 'Ws')]
AmSn18870301-V02-03-page4.txt: [('decal', 'ogue'), ('to', 'co'), ('Associ', 'ation')]
AmSn18870301-V02-03-page5.txt: [('f', 'orce')]
AmSn18870301-V02-03-page6.txt: [('Pres', 'byterian')]
AmSn18870301-V02-03-page8.txt: [('AMER', 'ICAN')]
AmSn18870401-V02-04-page6.txt: [('to', 'es'), ('amen', 'dment'), ('es', 'tablish')]
AmSn18870401-V02-04-page8.txt: [('SEN', 'TINEL')]
AmSn18870501-V02-05-page3.txt: [('au', 'thority')]
AmSn18870501-V02-05-page7.txt: [('COM', 'PLETE')]
AmSn18870601-V02-06-page8.txt: [('dimin', 'ished')]
AmSn18870701-V02-07-page3.txt: [('Gov', 'ernment')]
AmSn18870701-V02-07-page8.txt: [('b', 'id')]
AmSn18870801-V02-08-page8.txt: [('a', 'Te')]
AmSn18870901-V02-09-page6.txt: [('in', 'terest')]
AmSn18871001-V02-10-page1.txt: [('SEN', 'TINEL'), ('condi', 'tions')]
AmSn18871101-V02-11-page8.txt: [('Chris', 'tianity')]
AmSn18871201-V02-12-page1.txt: [('Associ', 'ation')]
AmSn18871201-V02-12-page3.txt: [('SEN', 'TINEL')]
AmSn18871201-V02-12-page5.txt: [('De', 'ception')]
AmSn18871201-V02-12-page8.txt: [('SEN', 'TINEL'), ('Con', 'Tention')]
AmSn18880101-V03-01-page1.txt: [('ex', 'actly')]
AmSn18880101-V03-01-page7.txt: [('r', 'oo'), ('sol', 'dier')]
AmSn18880101-V03-01-page8.txt: [('SEN', 'TINEL')]
AmSn18880201-V03-02-page1.txt: [('Sta', 'te')]
AmSn18880301-V03-03-page2.txt: [('conse', 'quently')]
AmSn18880301-V03-03-page8.txt: [('SEN', 'TINEL')]
AmSn18880401-V03-04-page1.txt: [('legis', 'lative')]
AmSn18880501-V03-05-page3.txt: [('a', 'nd'), ('hypo', 'Crites')]
AmSn18880501-V03-05-page7.txt: [('Prot', 'estant')]
AmSn18880601-V03-06-page2.txt: [('lib', 'erty')]
AmSn18880601-V03-06-page7.txt: [('you', 'th'), ('PRES', 'ERVATION')]
AmSn18880701-V03-07-page6.txt: [('ADDI', 'TIONAL')]
AmSn18880701-V03-07-page7.txt: [('ad', 'Vocate'), ('the', 're')]
AmSn18880801-V03-08-page5.txt: [('indus', 'trious')]
AmSn18880801-V03-08-page7.txt: [('biblic', 'al')]
AmSn18880901-V03-09-page4.txt: [('ar', 'rayed')]
AmSn18880901-V03-09-page5.txt: [('perse', 'cuted'), ('ten', 'ets'), ('we', 're')]
AmSn18880901-V03-09-page7.txt: [('con', 'tinuance')]
AmSn18881001-V03-10-page1.txt: [('SEN', 'TINEL')]
AmSn18881001-V03-10-page2.txt: [('fa', 'vor')]
AmSn18881001-V03-10-page6.txt: [('swe', 'eping')]
AmSn18881001-V03-10-page8.txt: [('amend', 'Ment')]
AmSn18881015-V03-10a-page5.txt: [('re', 'ligious'), ('SEN', 'TINEL')]
AmSn18881015-V03-10a-page7.txt: [('PRES', 'ERVATION')]
AmSn18881101-V03-11-page6.txt: [('genera', 'tions')]
AmSn18881201-V03-12-page3.txt: [('no', 'ma')]
AmSn18881201-V03-12-page4.txt: [('or', 'ganization')]
AmSn18881201-V03-12-page5.txt: [('temper', 'ance')]
AmSn18881201-V03-12-page6.txt: [('SEN', 'TINEL')]
AmSn18881201-V03-12-page8.txt: [('SEN', 'TINELS'), ('AMER', 'ICAN'), ('s', 'chool')]
AmSn18881201-V03-12-page9.txt: [('Go', 'dless'), ('Go', 'od'), ('Go', 'vernmental'), ('N', 'ational'), ('Go', 'vernment'), ('Go', 'spel')]
AmSn18890101-V04-01-page8.txt: [('SEN', 'TINEL')]
AmSn18890130-V04-02-page7.txt: [('car', 'ried'), ('C', 'ANVASSERS')]
AmSn18890130-V04-02-page8.txt: [('to', 'ro')]
AmSn18890206-V04-03-page2.txt: [('re', 'spect')]
AmSn18890206-V04-03-page7.txt: [('S', 'ABBATH')]
AmSn18890213-V04-04-page5.txt: [('SEN', 'TINEL'), ('par', 'ies')]
AmSn18890213-V04-04-page6.txt: [('j', 'ust'), ('ref', 'erence')]
AmSn18890213-V04-04-page7.txt: [('s', 'ABBATH')]
AmSn18890213-V04-04-page8.txt: [('insti', 'tuted'), ('AMER', 'ICAN')]
AmSn18890220-V04-05-page4.txt: [('i', 'ndorsement')]
AmSn18890220-V04-05-page7.txt: [('S', 'HORTHAND'), ('a', 'reli'), ('DIS', 'COVERIES')]
AmSn18890220-V04-05-page8.txt: [('SEN', 'TINEL')]
AmSn18890227-V04-06-page2.txt: [('the', 'Ca')]
AmSn18890227-V04-06-page7.txt: [('DIS', 'COVERIES')]
AmSn18890306-V04-07-page1.txt: [('e', 'th')]
AmSn18890306-V04-07-page3.txt: [('SEN', 'TINEL'), ('AMUSE', 'MENTS')]
AmSn18890306-V04-07-page5.txt: [('a', 'nd')]
AmSn18890306-V04-07-page7.txt: [('DIS', 'COVERIES')]
AmSn18890313-V04-08-page2.txt: [('h', 'alf')]
AmSn18890313-V04-08-page3.txt: [('so', 're')]
AmSn18890313-V04-08-page7.txt: [('an', 'Ab')]
AmSn18890320-V04-09-page1.txt: [('SEN', 'TINEL')]
AmSn18890320-V04-09-page7.txt: [('Ab', 'surdity'), ('an', 'Ab')]
AmSn18890327-V04-10-page7.txt: [('Or', 'ders')]
AmSn18890403-V04-11-page8.txt: [('Ber', 'th'), ('at', 'onement')]
AmSn18890410-V04-12-page7.txt: [('to', 'Ca'), ('p', 'OP'), ('Ca', 'sar')]
AmSn18890410-V04-12-page8.txt: [('SEN', 'TINEL'), ('la', 've'), ('AMER', 'ICAN')]
AmSn18890417-V04-13-page1.txt: [('AMER', 'ICAN')]
AmSn18890417-V04-13-page7.txt: [('S', 'OUTH'), ('S', 'HORTHAND'), ('T', 'ATTLE'), ('F', 'AMILY')]
AmSn18890417-V04-13-page8.txt: [('CALIF', 'ORNIA')]
AmSn18890424-V04-14-page7.txt: [('to', 'ri'), ('DIS', 'COVERIES'), ('e', 'CO')]
AmSn18890501-V04-15-page1.txt: [('as', 'sembly')]
AmSn18890501-V04-15-page7.txt: [('LIBRA', 'RY'), ('DIS', 'COVERIES'), ('con', 'tains')]
AmSn18890501-V04-15-page8.txt: [('con', 'tains')]
AmSn18890515-V04-16-page7.txt: [('LIBRA', 'RY')]
AmSn18890529-V04-18-page6.txt: [('AMER', 'ICAN')]
AmSn18890529-V04-18-page7.txt: [('LIBRA', 'RY'), ('A', 'RCHITECTS')]
AmSn18890529-V04-18-page8.txt: [('AMER', 'ICAN')]
AmSn18890605-V04-19-page1.txt: [('Ber', 'th')]
AmSn18890605-V04-19-page3.txt: [('toot', 'hbrushes')]
AmSn18890605-V04-19-page7.txt: [('L', 'ife')]
AmSn18890612-V04-20-page7.txt: [('s', 'ecs')]
AmSn18890612-V04-20-page8.txt: [('SEN', 'TINEL')]
AmSn18890619-V04-21-page1.txt: [('j', 'udicial')]
AmSn18890619-V04-21-page7.txt: [('a', 'nd')]
AmSn18890626-V04-22-page4.txt: [('lib', 'erty')]
AmSn18890626-V04-22-page7.txt: [('o', 'ct'), ('P', 'ENCIL')]
AmSn18890626-V04-22-page8.txt: [('AMER', 'ICAN')]
AmSn18890703-V04-23-page6.txt: [('immo', 'rality')]
AmSn18890703-V04-23-page7.txt: [('DIS', 'COVERIES'), ('to', 'rr')]
AmSn18890710-V04-24-page7.txt: [('DIS', 'COVERIES'), ('P', 'HOTOGRAPH')]
AmSn18890717-V04-25-page5.txt: [('ef', 'fectually')]
AmSn18890717-V04-25-page7.txt: [('L', 'ife'), ('H', 'appy')]
AmSn18890724-V04-26-page5.txt: [('or', 'iginators')]
AmSn18890724-V04-26-page7.txt: [('P', "UBLISHERS'"), ('A', 'lso')]
AmSn18890731-V04-27-page3.txt: [('har', 'mony')]
AmSn18890731-V04-27-page8.txt: [('SEN', 'TINEL')]
AmSn18890807-V04-28-page3.txt: [('an', 'sWerable')]
AmSn18890807-V04-28-page6.txt: [('of', 'fice')]
AmSn18890814-V04-29-page3.txt: [('SE', 'NTINEL'), ('on', 'ly')]
AmSn18890814-V04-29-page7.txt: [('A', 'RCHITECTS')]
AmSn18890821-V04-30-page7.txt: [('R', 'eform'), ('D', 'ress'), ('L', "adies'"), ('P', 'ACIFIC')]
AmSn18890828-V04-31-page2.txt: [('amuse', 'ments'), ('char', 'acter')]
AmSn18890828-V04-31-page4.txt: [('j', 'udiciary')]
AmSn18890828-V04-31-page7.txt: [('T', 'IA'), ('S', 'AFETY')]
AmSn18890905-V04-32-page3.txt: [('a', 'cre')]
AmSn18890911-V04-33-page2.txt: [('follow', 'ers')]
AmSn18890911-V04-33-page4.txt: [('discov', 'ered')]
AmSn18890911-V04-33-page7.txt: [('A', 'RE')]
AmSn18890918-V04-34-page5.txt: [('ac', 'tions'), ('con', 'trary')]
AmSn18890918-V04-34-page7.txt: [('i', 'ns')]
AmSn18890925-V04-35-page1.txt: [('per', 'secutes')]
AmSn18890925-V04-35-page2.txt: [('prov', 'ided')]
AmSn18890925-V04-35-page7.txt: [('cele', 'brated'), ('WIS', 'CONSIN')]
AmSn18891002-V04-36-page1.txt: [('re', 'spect')]
AmSn18891002-V04-36-page4.txt: [('bro', 'ught')]
AmSn18891002-V04-36-page7.txt: [('C', 'ivil'), ('P', 'ACIFIC'), ('W', 'aggoner')]
AmSn18891002-V04-36-page8.txt: [('re', 'spects'), ('who', 're'), ('persist', 'ent')]
AmSn18891009-V04-37-page1.txt: [('leg', 'islatures')]
AmSn18891009-V04-37-page2.txt: [('SEN', 'TINEL')]
AmSn18891009-V04-37-page6.txt: [('ab', 'sence')]
AmSn18891009-V04-37-page8.txt: [('SEN', 'TINEL')]
AmSn18891016-V04-38-page7.txt: [('WIS', 'CONSIN'), ('t', 'ie')]
AmSn18891016-V04-38-page8.txt: [('AMER', 'ICAN')]
AmSn18891023-V04-39-page4.txt: [('to', 'co')]
AmSn18891023-V04-39-page7.txt: [('WIS', 'CONSIN'), ('DIS', 'COVERIES')]
AmSn18891030-V04-40-page3.txt: [('dis', 'tinction')]
AmSn18891030-V04-40-page7.txt: [('WIS', 'CONSIN'), ('DIS', 'COVERIES')]
AmSn18891106-V04-41-page2.txt: [('state', 'ment')]
AmSn18891106-V04-41-page3.txt: [('a', 'll')]
AmSn18891106-V04-41-page8.txt: [('e', 'rr')]
AmSn18891113-V04-42-page1.txt: [('af', 'fections')]
AmSn18891113-V04-42-page2.txt: [('to', 'es')]
AmSn18891113-V04-42-page3.txt: [('a', 'ppointed'), ('a', 'nd'), ('a', 'll')]
AmSn18891113-V04-42-page5.txt: [('Gov', 'ernment')]
AmSn18891113-V04-42-page6.txt: [('re', 'ligious'), ('a', 're')]
AmSn18891113-V04-42-page7.txt: [('with', 'Ee'), ('S', 'cholarship')]
AmSn18891120-V04-43-page3.txt: [('fes', 'ses'), ('fail', 'ure')]
AmSn18891120-V04-43-page6.txt: [('B', 'IC'), ('an', 'noyance')]
AmSn18891120-V04-43-page8.txt: [('be', 'lieve')]
AmSn18891127-V04-44-page2.txt: [('persecu', 'tion')]
AmSn18891127-V04-44-page7.txt: [('to', 'NI'), ('inf', 'orm')]
AmSn18891204-V04-45-page3.txt: [('Massa', 'chusetts')]
AmSn18891204-V04-45-page7.txt: [('polit', 'ical'), ('comp', 'ete'), ('l', 'ino')]
AmSn18891211-V04-46-page8.txt: [('SEN', 'TINEL')]
AmSn18891218-V04-47-page1.txt: [('SEN', 'TINEL'), ('the', 'orize')]
AmSn18891218-V04-47-page5.txt: [('re', 'ligious')]
AmSn18891218-V04-47-page8.txt: [('SEN', 'TINEL'), ('AMER', 'ICAN')]
AmSn18891225-V04-48-page1.txt: [('SEN', 'TINEL'), ('pre', 'vailing')]
AmSn18891225-V04-48-page2.txt: [('Chris', 'tian')]
AmSn18891225-V04-48-page5.txt: [('we', 're')]
AmSn18891225-V04-48-page6.txt: [('danger', 'Ous')]
AmSn18891225-V04-48-page7.txt: [('ER', 'ICA')]
AmSn18891225-V04-48-page8.txt: [('ab', 'ut')]
AmSn18891225-V04-48-page9.txt: [('amend', 'ment')]
AmSn18900102-V05-01-page1.txt: [('SEN', 'TINEL')]
AmSn18900109-V05-02-page3.txt: [('SEN', 'TINEL')]
AmSn18900109-V05-02-page7.txt: [('P', 'IE')]
AmSn18900116-V05-03-page3.txt: [('AMER', 'ICAN')]
AmSn18900116-V05-03-page4.txt: [('Chris', 'tians')]
AmSn18900116-V05-03-page7.txt: [('houseke', 'epers'), ('S', 'ID')]
AmSn18900123-V05-04-page1.txt: [('fo', 'rce')]
AmSn18900123-V05-04-page2.txt: [('per', 'tains')]
AmSn18900123-V05-04-page4.txt: [('Con', 'gress')]
AmSn18900123-V05-04-page6.txt: [('a', 'nd'), ('a', 'rc')]
AmSn18900123-V05-04-page7.txt: [('ma', 'terial')]
AmSn18900123-V05-04-page8.txt: [('mi', 'Ssionary')]
AmSn18900130-V05-05-page1.txt: [('re', 'ceive'), ('to', 're')]
AmSn18900130-V05-05-page4.txt: [('addi', 'tion')]
AmSn18900130-V05-05-page5.txt: [('SEN', 'TINEL')]
AmSn18900130-V05-05-page7.txt: [('a', 'da'), ('the', 're')]
AmSn18900130-V05-05-page8.txt: [('treas', 'ury')]
AmSn18900206-V05-06-page3.txt: [('threat', 'ening')]
AmSn18900206-V05-06-page5.txt: [('fur', 'ther')]
AmSn18900206-V05-06-page7.txt: [('M', 'edicine'), ('P', 'IE'), ('D', 'om')]
AmSn18900206-V05-06-page8.txt: [('Legis', 'lature')]
AmSn18900213-V05-07-page7.txt: [('SIG', 'NS')]
AmSn18900220-V05-08-page1.txt: [('the', 'se')]
AmSn18900220-V05-08-page2.txt: [('utilita', 'rian')]
AmSn18900220-V05-08-page6.txt: [('sic', 'kness')]
AmSn18900227-V05-09-page1.txt: [('origi', 'nated')]
AmSn18900227-V05-09-page2.txt: [('Ken', 'tucky'), ('AMER', 'ICAN'), ('Com', 'mittee')]
AmSn18900227-V05-09-page3.txt: [('SEC', 'TION')]
AmSn18900227-V05-09-page7.txt: [('a', 'nd')]
AmSn18900306-V05-10-page1.txt: [('Pa', 'se')]
AmSn18900306-V05-10-page2.txt: [('Do', 'uay'), ('Wis', 'consin'), ('Roman', 'Ce')]
AmSn18900306-V05-10-page6.txt: [('man', 'IC')]
AmSn18900306-V05-10-page7.txt: [('E', 'RI')]
AmSn18900313-V05-11-page4.txt: [('pa', 'rt')]
AmSn18900313-V05-11-page6.txt: [('evi', 'dently')]
AmSn18900313-V05-11-page7.txt: [('AMER', 'ICAN')]
AmSn18900320-V05-12-page6.txt: [('SEN', 'TINEL')]
AmSn18900327-V05-13-page2.txt: [('o', 'wn')]
AmSn18900327-V05-13-page7.txt: [('Y', 'es')]
AmSn18900403-V05-14-page2.txt: [('syS', 'tem')]
AmSn18900410-V05-15-page3.txt: [('neces', 'sArily')]
AmSn18900410-V05-15-page4.txt: [('par', 'se')]
AmSn18900410-V05-15-page6.txt: [('ques', 'tion')]
AmSn18900410-V05-15-page7.txt: [('M', 'ILLIONS')]
AmSn18900417-V05-16-page1.txt: [('sup', 'pression'), ('V', 'OLUME')]
AmSn18900417-V05-16-page6.txt: [('s', 'chool')]
AmSn18900417-V05-16-page7.txt: [('M', 'ILLIONS')]
AmSn18900417-V05-16-page8.txt: [('Cath', 'olic')]
AmSn18900424-V05-17-page7.txt: [('eve', 'ryone'), ('W', 'orld')]
AmSn18900501-V05-18-page1.txt: [('til', 'th')]
AmSn18900501-V05-18-page2.txt: [('on', 'ly'), ('Chris', 'tians')]
AmSn18900501-V05-18-page6.txt: [('objec', 'tionable')]
AmSn18900508-V05-19-page3.txt: [('S', 'ENTINEL')]
AmSn18900515-V05-20-page1.txt: [('minis', 'ters')]
AmSn18900515-V05-20-page5.txt: [('i', 'mportant')]
AmSn18900515-V05-20-page7.txt: [('eve', 'ryone')]
AmSn18900522-V05-21-page2.txt: [('Chr', 'ist'), ('right', 'eousness')]
AmSn18900522-V05-21-page3.txt: [('con', 'sume')]
AmSn18900529-V05-22-page2.txt: [('speak', 'easies')]
AmSn18900529-V05-22-page7.txt: [('W', 'onderful')]
AmSn18900605-V05-23-page7.txt: [('W', 'orld')]
AmSn18900612-V05-24-page3.txt: [('govern', 'Ment')]
AmSn18900612-V05-24-page7.txt: [('at', 'tached')]
AmSn18900612-V05-24-page8.txt: [('state', 'ment')]
AmSn18900619-V05-25-page2.txt: [('spe', 'cifically')]
AmSn18900619-V05-25-page7.txt: [('M', 'IGHTY')]
AmSn18900626-V05-26-page7.txt: [('hea', 'th'), ('M', 'IGHTY')]
AmSn18900717-V05-28-page6.txt: [('protec', 'tion')]
AmSn18900724-V05-29-page1.txt: [('govern', 'ments')]
AmSn18900724-V05-29-page7.txt: [('the', 'ba')]
AmSn18900731-V05-30-page1.txt: [('SEN', 'TINEL')]
AmSn18900731-V05-30-page2.txt: [('a', 'lso')]
AmSn18900731-V05-30-page6.txt: [('o', 'ne')]
AmSn18900814-V05-32-page4.txt: [('consta', 'bles'), ('a', 'boriginal')]
AmSn18900814-V05-32-page8.txt: [('G', 'overnment')]
AmSn18900821-V05-33-page5.txt: [('beau', 'tiful')]
AmSn18900821-V05-33-page7.txt: [('invest', 'ment'), ('sten', 'cil'), ('at', 'tached')]
AmSn18900828-V05-34-page7.txt: [('Ob', 'ject'), ('at', 'tached')]
AmSn18900904-V05-35-page7.txt: [('P', 'IE')]
AmSn18900911-V05-36-page2.txt: [('in', 'hibiting')]
AmSn18900911-V05-36-page3.txt: [('SEN', 'TINEL')]
AmSn18900918-V05-37-page1.txt: [('B', 'ib'), ('a', 'griC'), ('o', 'ft'), ('m', 'ay'), ('se', 'cy'), ('t', 'Ia')]
AmSn18900918-V05-37-page4.txt: [('i', 'asi'), ('s', 'IL'), ('P', 'EtIt'), ('A', 'te'), ('m', 'Ap'), ('t', 'oi'), ('b', 'ei')]
AmSn18900918-V05-37-page5.txt: [('lA', 'rd')]
AmSn18900918-V05-37-page6.txt: [('o', 'ut')]
AmSn18900918-V05-37-page8.txt: [('era', 'th'), ('n', 'ip'), ('e', 'ying'), ('d', 'id'), ('s', 'op')]
AmSn18900925-V05-38-page8.txt: [('SEN', 'TINEL'), ('over', 'whelmingly')]
AmSn18901009-V05-40-page6.txt: [('AMER', 'ICAN')]
AmSn18901016-V05-41-page4.txt: [('regula', 'tions'), ('legisla', 'tion')]
AmSn18901016-V05-41-page5.txt: [('amuse', 'ments')]
AmSn18901016-V05-41-page6.txt: [('CON', 'SERVATOR')]
AmSn18901016-V05-41-page7.txt: [('W', 'orkings')]
AmSn18901023-V05-42-page1.txt: [('BY', 'TES')]
AmSn18901023-V05-42-page2.txt: [('a', 'nd'), ('stud', 'ies'), ('to', 'tal')]
AmSn18901023-V05-42-page3.txt: [('to', 're')]
AmSn18901023-V05-42-page4.txt: [('a', 'bOVe'), ('a', 'nd')]
AmSn18901023-V05-42-page6.txt: [('cap', 'tured'), ('move', 'ment')]
AmSn18901030-V05-43-page1.txt: [('Living', 'ston')]
AmSn18901030-V05-43-page3.txt: [('Hollow', 'ay'), ('author', 'ities')]
AmSn18901030-V05-43-page5.txt: [('AMER', 'ICA')]
AmSn18901106-V05-44-page3.txt: [('SEN', 'TINEL'), ('in', 'Volves')]
AmSn18901106-V05-44-page7.txt: [('con', 'Stantly')]
AmSn18901113-V05-45-page2.txt: [('P', 'ress'), ('Govern', 'Ment')]
AmSn18901113-V05-45-page7.txt: [('A', 'ND')]
AmSn18901120-V05-46-page3.txt: [('ra', 'pidly')]
AmSn18901127-V05-47-page4.txt: [('me', 'morialize'), ('per', 'fect')]
AmSn18901127-V05-47-page7.txt: [('con', 'stantly'), ('T', 'ams')]
AmSn18901127-V05-47-page8.txt: [('rega', 'rds')]
AmSn18901204-V05-48-page8.txt: [('neces', 'sary')]
AmSn18901211-V05-49-page4.txt: [('repugna', 'nt')]
AmSn18901211-V05-49-page7.txt: [('con', 'stantly')]
AmSn18901218-V05-50-page3.txt: [('minor', 'ity'), ('theol', 'ogy'), ('rewa', 'rds'), ('the', 're')]
AmSn18901218-V05-50-page5.txt: [('the', 'refore')]
AmSn18901218-V05-50-page6.txt: [('d', 'ays')]
AmSn18901218-V05-50-page9.txt: [('con', 'vention'), ('B', 'ible')]
AmSn18910101-V06-01-page1.txt: [('SEN', 'TINEL')]
AmSn18910101-V06-01-page3.txt: [('w', 'ould')]
AmSn18910101-V06-01-page4.txt: [('sup', 'pression')]
AmSn18910101-V06-01-page5.txt: [('tor', 'tures')]
AmSn18910108-V06-02-page6.txt: [('legisla', 'tion')]
AmSn18910115-V06-03-page3.txt: [('SEN', 'TINEL'), ('pros', 'pered')]
AmSn18910115-V06-03-page4.txt: [('A', 'merican')]
AmSn18910115-V06-03-page7.txt: [('pe', 'ns'), ('l', 'eather')]
AmSn18910115-V06-03-page8.txt: [('a', 'nd'), ('AMER', 'ICAN')]
AmSn18910122-V06-04-page2.txt: [('t', 'iti'), ('pro', 'nounced')]
AmSn18910122-V06-04-page6.txt: [('prop', 'erty')]
AmSn18910122-V06-04-page8.txt: [('AMER', 'ICAN')]
AmSn18910129-V06-05-page2.txt: [('de', 'manded')]
AmSn18910129-V06-05-page3.txt: [('per', 'ception'), ('in', 'stinct')]
AmSn18910129-V06-05-page4.txt: [('Govern', 'Ment')]
AmSn18910129-V06-05-page7.txt: [('Y', 'ork'), ('devel', 'opment')]
AmSn18910212-V06-07-page3.txt: [('PE', 'TITIONED'), ('Leg', 'islature')]
AmSn18910212-V06-07-page4.txt: [('An', 'oa'), ('AMER', 'ICAN')]
AmSn18910212-V06-07-page8.txt: [('anniver', 'sary')]
AmSn18910219-V06-08-page3.txt: [('SEN', 'TINEL')]
AmSn18910219-V06-08-page7.txt: [('w', 'ork')]
AmSn18910226-V06-09-page7.txt: [('w', 'ork')]
AmSn18910226-V06-09-page8.txt: [('de', 'Cided')]
AmSn18910305-V06-10-page4.txt: [('num', 'ber')]
AmSn18910305-V06-10-page6.txt: [('mani', 'festation'), ('as', 'sured')]
AmSn18910305-V06-10-page8.txt: [('AMER', 'ICAN')]
AmSn18910319-V06-12-page6.txt: [('A', 'MERICAN')]
AmSn18910319-V06-12-page7.txt: [('W', 'ORSHIP')]
AmSn18910326-V06-13-page8.txt: [('worsh', 'ip')]
AmSn18910402-V06-14-page2.txt: [('SEN', 'TINEL')]
AmSn18910402-V06-14-page4.txt: [('a', 'bl')]
AmSn18910402-V06-14-page8.txt: [('A', 'MERICAN'), ('inter', 'ests')]
AmSn18910409-V06-15-page4.txt: [('the', 'se')]
AmSn18910409-V06-15-page7.txt: [('a', 'ges')]
AmSn18910416-V06-16-page1.txt: [('In', 'ti')]
AmSn18910416-V06-16-page2.txt: [('state', 'ment')]
AmSn18910416-V06-16-page3.txt: [('A', 'nd')]
AmSn18910416-V06-16-page8.txt: [('Y', 'ork')]
AmSn18910423-V06-17-page4.txt: [('infringe', 'ment')]
AmSn18910423-V06-17-page6.txt: [('a', 'nd')]
AmSn18910423-V06-17-page8.txt: [('the', 'se'), ('con', 'gregation')]
AmSn18910430-V06-18-page7.txt: [('THE', 'Ca')]
AmSn18910507-V06-19-page5.txt: [('AM', 'ERICAN')]
AmSn18910507-V06-19-page8.txt: [('A', 'MERICAN')]
AmSn18910514-V06-20-page3.txt: [('op', 'portunity')]
AmSn18910514-V06-20-page7.txt: [('at', 'tached')]
AmSn18910521-V06-21-page4.txt: [('infringe', 'ment')]
AmSn18910521-V06-21-page7.txt: [('F', 'iNe'), ('W', 'ith')]
AmSn18910604-V06-23-page6.txt: [('AMER', 'ICAN')]
AmSn18910604-V06-23-page8.txt: [('for', 'th')]
AmSn18910611-V06-24-page2.txt: [('SEN', 'TINEL'), ('A', 'ugustus')]
AmSn18910618-V06-25-page6.txt: [('Com', "mittee's"), ('AM', 'ERICAN')]
AmSn18910618-V06-25-page7.txt: [('Y', 'ORK')]
AmSn18910625-V06-26-page3.txt: [('pun', 'ished')]
AmSn18910625-V06-26-page4.txt: [('in', 'ti')]
AmSn18910625-V06-26-page5.txt: [('prac', 'tically')]
AmSn18910625-V06-26-page7.txt: [('for', 'te')]
AmSn18910709-V06-27-page3.txt: [('con', 'versant')]
AmSn18910709-V06-27-page6.txt: [('to', 'co'), ('thor', 'oughly')]
AmSn18910709-V06-27-page7.txt: [('t', 'iA')]
AmSn18910709-V06-27-page8.txt: [('frater', 'nity')]
AmSn18910716-V06-28-page1.txt: [('the', 're')]
AmSn18910716-V06-28-page6.txt: [('C', 'urt')]
AmSn18910716-V06-28-page8.txt: [('SEN', 'TINEL')]
AmSn18910806-V06-31-page6.txt: [('Amer', 'ican')]
AmSn18910827-V06-34-page5.txt: [('Chris', 'tian')]
AmSn18910827-V06-34-page7.txt: [('HEN', 'RY')]
AmSn18910827-V06-34-page8.txt: [('min', 'ister')]
AmSn18910903-V06-35-page1.txt: [('Govern', 'ment')]
AmSn18910903-V06-35-page2.txt: [('prin', 'ciple')]
AmSn18910903-V06-35-page5.txt: [('Ex', 'amples')]
AmSn18910903-V06-35-page6.txt: [('de', 'tectives'), ('AMER', 'ICAN')]
AmSn18910903-V06-35-page7.txt: [('Ca', 'Ns'), ('W', 'itchcraft'), ('to', 'ld')]
AmSn18910910-V06-36-page1.txt: [('legisla', 'tion')]
AmSn18910910-V06-36-page4.txt: [('quota', 'tions'), ('ma', 'jority')]
AmSn18910910-V06-36-page5.txt: [('invec', 'tives'), ('per', 'se'), ('char', 'acterizes'), ('con', 'dition'), ('condi', 'tion'), ('Chris', 'tian')]
AmSn18910910-V06-36-page6.txt: [('hun', 'dred')]
AmSn18910910-V06-36-page7.txt: [('P', 'ress')]
AmSn18910917-V06-37-page3.txt: [('be', 'lieves'), ('end', 'ureth'), ('man', 'agers')]
AmSn18910917-V06-37-page6.txt: [('pro', 'tects'), ('Gov', 'ernor'), ('sym', 'pathy'), ('Chris', 'tian')]
AmSn18910917-V06-37-page7.txt: [('Or', 'namental')]
AmSn18910924-V06-38-page2.txt: [('min', 'ister')]
AmSn18910924-V06-38-page5.txt: [('per', 'secution')]
AmSn18910924-V06-38-page7.txt: [('a', 'nd')]
AmSn18911022-V06-41-page1.txt: [('in', 'Sisted')]
AmSn18911022-V06-41-page3.txt: [('SEN', 'TINEL')]
AmSn18911022-V06-41-page4.txt: [('per', 'se')]
AmSn18911022-V06-41-page6.txt: [('agree', 'ment'), ('Camp', 'bellites')]
AmSn18911022-V06-41-page8.txt: [('state', 'ment'), ('a', 'llow')]
AmSn18911112-V06-44-page8.txt: [('T', 'IE')]
AmSn18911126-V06-46-page4.txt: [('expres', 'sed')]
AmSn18911126-V06-46-page8.txt: [('legal', 'ize')]
AmSn18911203-V06-47-page1.txt: [('SEN', 'TINEL')]
AmSn18911203-V06-47-page3.txt: [('IN', 'STITUTION')]
AmSn18911203-V06-47-page7.txt: [('the', 'ba'), ('me', 'chanics')]
AmSn18911210-V06-48-page6.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER')]
AmSn18911217-V06-49-page4.txt: [('christian', 'ized')]
AmSn18911217-V06-49-page5.txt: [('LIV', 'INGSTONE'), ('differ', 'ent')]
AmSn18911224-V06-50-page2.txt: [('persecu', 'tion')]
AmSn18911224-V06-50-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('LIV', 'INGSTONE')]
AmSn18920107-V07-01-page5.txt: [('reli', 'gious')]
AmSn18920107-V07-01-page6.txt: [('S', 'te')]
AmSn18920107-V07-01-page7.txt: [('Pi', 'oneer')]
AmSn18920107-V07-01-page8.txt: [('A', 'ny')]
AmSn18920114-V07-02-page5.txt: [('nefa', 'rious')]
AmSn18920114-V07-02-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('HEN', 'RY')]
AmSn18920121-V07-03-page3.txt: [('see', 'th'), ('elo', 'quently')]
AmSn18920128-V07-04-page5.txt: [('rec', 'ognize')]
AmSn18920128-V07-04-page7.txt: [('HEN', 'RY'), ('diction', 'ary'), ('FOR', 'EIGN')]
AmSn18920204-V07-05-page5.txt: [('P', 'ig'), ('a', 'cc'), ('A', 'CC'), ('o', 'ct'), ('b', 'id')]
AmSn18920204-V07-05-page6.txt: [('SEN', 'TINEL'), ('to', 're')]
AmSn18920204-V07-05-page7.txt: [('ORIGIN', 'AL'), ('HEN', 'RY'), ('GAZ', 'ETTEER')]
AmSn18920211-V07-06-page5.txt: [('tri', 'bunal')]
AmSn18920211-V07-06-page7.txt: [('e', 'LI'), ('HEN', 'RY'), ('diction', 'ary'), ('FOR', 'EIGN')]
AmSn18920218-V07-07-page2.txt: [('be', 'lial'), ('Chris', 'tianity'), ('indict', 'ment')]
AmSn18920218-V07-07-page3.txt: [('fr', 'om')]
AmSn18920218-V07-07-page6.txt: [('pub', 'lished')]
AmSn18920218-V07-07-page7.txt: [('FOR', 'EIGN')]
AmSn18920225-V07-08-page7.txt: [('GAZ', 'ETTEER')]
AmSn18920303-V07-09-page7.txt: [('DICTION', 'ARY'), ('diction', 'ary'), ('miner', 'als'), ('Chris', 'tianity')]
AmSn18920310-V07-10-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')]
AmSn18920317-V07-11-page7.txt: [('diction', 'ary')]
AmSn18920324-V07-12-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')]
AmSn18920331-V07-13-page1.txt: [('indi', 'cates')]
AmSn18920331-V07-13-page3.txt: [('A', 'fter')]
AmSn18920331-V07-13-page5.txt: [('by', 're')]
AmSn18920407-V07-14-page7.txt: [('GEN', 'ERAL')]
AmSn18920414-V07-15-page2.txt: [('SEN', 'TINEL'), ('the', 'se')]
AmSn18920414-V07-15-page4.txt: [('com', 'mittee')]
AmSn18920414-V07-15-page7.txt: [('Am', 'erican')]
AmSn18920428-V07-17-page7.txt: [('l', 'imes'), ('t', 'ok')]
AmSn18920505-V07-18-page3.txt: [('f', 'oresaw')]
AmSn18920505-V07-18-page4.txt: [('in', 'corporating')]
AmSn18920505-V07-18-page5.txt: [('AME', 'RICAN')]
AmSn18920519-V07-20-page6.txt: [('w', 'hich')]
AmSn18920519-V07-20-page7.txt: [('DICTION', 'ARY'), ('diction', 'ary')]
AmSn18920526-V07-21-page3.txt: [('in', 'asmuch')]
AmSn18920526-V07-21-page4.txt: [('author', 'ities')]
AmSn18920526-V07-21-page5.txt: [('CO', 'CO'), ('In', 'teresting'), ('CO', 'Co'), ('it', 'al'), ('Go', 'vernment')]
AmSn18920526-V07-21-page7.txt: [('FOR', 'EIGN')]
AmSn18920602-V07-22-page4.txt: [('the', 're')]
AmSn18920602-V07-22-page7.txt: [('in', 'hibition')]
AmSn18920609-V07-23-page1.txt: [('Con', 'gress')]
AmSn18920609-V07-23-page3.txt: [('T', 'iE')]
AmSn18920616-V07-24-page7.txt: [('H', 'Id'), ('H', 'UI')]
AmSn18920623-V07-25-page2.txt: [('dis', 'turbed')]
AmSn18920623-V07-25-page4.txt: [('gen', 'erally'), ('w', 'ith')]
AmSn18920630-V07-26-page5.txt: [('AMER', 'ICAN')]
AmSn18920630-V07-26-page8.txt: [('SEN', 'TINEL')]
AmSn18920714-V07-27-page6.txt: [('SEN', 'TINEL'), ('i', 'ncline')]
AmSn18920721-V07-28-page7.txt: [('In', 'ca'), ('FOR', 'EIGN')]
AmSn18920728-V07-29-page3.txt: [('Con', 'stitution')]
AmSn18920728-V07-29-page5.txt: [('to', 'te')]
AmSn18920728-V07-29-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('diction', 'ary')]
AmSn18920728-V07-29-page8.txt: [('men', 'pleasers')]
AmSn18920804-V07-30-page7.txt: [('DICTION', 'ARY'), ('miner', 'als')]
AmSn18920811-V07-31-page3.txt: [('a', 'gainst')]
AmSn18920811-V07-31-page4.txt: [('Expo', 'sition'), ('he', 're')]
AmSn18920811-V07-31-page7.txt: [('GAZ', 'ETTEER'), ('diction', 'ary')]
AmSn18920818-V07-32-page7.txt: [('FOR', 'EIGN')]
AmSn18920818-V07-32-page8.txt: [('win', 'ce')]
AmSn18920825-V07-33-page2.txt: [('to', 're')]
AmSn18920825-V07-33-page3.txt: [('per', 'se')]
AmSn18920825-V07-33-page7.txt: [('r', 'efer')]
AmSn18920825-V07-33-page8.txt: [('annoy', 'ance')]
AmSn18920901-V07-34-page1.txt: [('E', 'qual')]
AmSn18920901-V07-34-page3.txt: [('con', 'nection')]
AmSn18920901-V07-34-page4.txt: [('four', 'teenth'), ('and', 're')]
AmSn18920901-V07-34-page7.txt: [('GAZ', 'ETTEER'), ('miner', 'als')]
AmSn18920908-V07-35-page8.txt: [('in', 'delibly')]
AmSn18920915-V07-36-page7.txt: [('HEN', 'RY')]
AmSn18920915-V07-36-page8.txt: [('per', 'mitted')]
AmSn18920929-V07-38-page6.txt: [('and', 'es')]
AmSn18920929-V07-38-page7.txt: [('FOR', 'EIGN'), ('in', 'vited')]
AmSn18921006-V07-39-page7.txt: [('DICTION', 'ARY'), ('diction', 'ary')]
AmSn18921006-V07-39-page8.txt: [('o', 'ne')]
AmSn18921013-V07-40-page1.txt: [('w', 'hich')]
AmSn18921020-V07-41-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')]
AmSn18921027-V07-42-page1.txt: [('AMER', 'ICAN')]
AmSn18921027-V07-42-page7.txt: [('Christian', 'ity'), ('DICTION', 'ARY'), ('FOR', 'EIGN')]
AmSn18921103-V07-43-page4.txt: [('m', 'eeting')]
AmSn18921110-V07-44-page8.txt: [('S', 'tE')]
AmSn18921117-V07-45-page7.txt: [('DICTION', 'ARY'), ('FOR', 'EIGN')]
AmSn18921117-V07-45-page8.txt: [('per', 'centage')]
AmSn18921124-V07-46-page7.txt: [('leg', 'islation')]
AmSn18921201-V07-47-page7.txt: [('GAZ', 'ETTEER'), ('FOR', 'EIGN')]
AmSn18921208-V07-48-page7.txt: [('SEN', 'TINEL')]
AmSn18921208-V07-48-page8.txt: [('to', 'ft')]
AmSn18921215-V07-49-page2.txt: [('AMER', 'ICAN')]
AmSn18921215-V07-49-page7.txt: [('HEN', 'RY')]
AmSn18921215-V07-49-page8.txt: [('St', 'atesman'), ('St', 'evenson'), ('T', 'IE'), ('St', 'ates')]
AmSn18921222-V07-50-page1.txt: [('LIB', 'ERTY')]
AmSn18921222-V07-50-page8.txt: [('SEN', 'TINEL')]
AmSn18930112-V08-02-page2.txt: [('SEN', 'TINEL')]
AmSn18930112-V08-02-page7.txt: [('H', 'AL')]
AmSn18930112-V08-02-page8.txt: [('non', 'feasance'), ('Jan', 'uary'), ('add', 'ress')]
AmSn18930119-V08-03-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER'), ('DICTION', 'ARY')]
AmSn18930126-V08-04-page7.txt: [('C', 'reola'), ('t', 'ome')]
AmSn18930126-V08-04-page8.txt: [('add', 'ress')]
AmSn18930202-V08-05-page6.txt: [('w', 'orld')]
AmSn18930209-V08-06-page4.txt: [('A', 'lk')]
AmSn18930209-V08-06-page6.txt: [('Ines', 'timable')]
AmSn18930209-V08-06-page7.txt: [('hodge', 'podge')]
AmSn18930216-V08-07-page6.txt: [('C', 'hristian'), ('N', 'ation'), ('A', 're')]
AmSn18930216-V08-07-page7.txt: [('AMER', 'ICAN')]
AmSn18930309-V08-10-page7.txt: [('at', 'torney'), ('THE', 'CA')]
AmSn18930316-V08-11-page5.txt: [('per', 'se')]
AmSn18930316-V08-11-page7.txt: [('DICTION', 'ARY')]
AmSn18930406-V08-14-page3.txt: [('exemp', 'tion')]
AmSn18930406-V08-14-page7.txt: [('V', 'IC'), ('i', 'NT'), ('FUR', 'NISHED')]
AmSn18930406-V08-14-page8.txt: [('add', 'ress')]
AmSn18930413-V08-15-page5.txt: [('l', 'aw')]
AmSn18930420-V08-16-page7.txt: [('DICTION', 'ARY')]
AmSn18930427-V08-17-page2.txt: [('JOSE', 'PH')]
AmSn18930504-V08-18-page7.txt: [('DICTION', 'ARY'), ('FUR', 'NISHED')]
AmSn18930518-V08-20-page7.txt: [('DICTION', 'ARY')]
AmSn18930601-V08-22-page7.txt: [('GAZ', 'ETTEER'), ('gaz', 'etteer')]
AmSn18930608-V08-23-page4.txt: [('w', 'orship')]
AmSn18930608-V08-23-page6.txt: [('lib', 'erty')]
AmSn18930615-V08-24-page5.txt: [('con', 'stitutionality')]
AmSn18930615-V08-24-page8.txt: [('SEN', 'TINEL')]
AmSn18930629-V08-26-page8.txt: [('A', 'MI')]
AmSn18930706-V08-27-page2.txt: [('a', 'fferent')]
AmSn18930706-V08-27-page3.txt: [('per', 'secution')]
AmSn18930706-V08-27-page6.txt: [('h', 'eretics')]
AmSn18930706-V08-27-page7.txt: [('DICTION', 'ARY')]
AmSn18930720-V08-29-page4.txt: [('mission', 'ary')]
AmSn18930720-V08-29-page8.txt: [('rep', 'utation'), ('c', 'ato')]
AmSn18930727-V08-30-page2.txt: [('amend', 'ments'), ('AMER', 'ICAN')]
AmSn18930803-V08-31-page7.txt: [('W', 'afers'), ('diction', 'ary')]
AmSn18930810-V08-32-page3.txt: [('Wo', "rld's")]
AmSn18930810-V08-32-page5.txt: [('In', 'terior')]
AmSn18930810-V08-32-page8.txt: [('Haw', 'keye')]
AmSn18930817-V08-33-page6.txt: [('per', 'se')]
AmSn18930831-V08-34-page2.txt: [('van', 'ity')]
AmSn18930831-V08-34-page5.txt: [('per', 'se')]
AmSn18930831-V08-34-page8.txt: [('pro', 'hibited')]
AmSn18930907-V08-35-page2.txt: [('a', 'bstract')]
AmSn18930914-V08-36-page4.txt: [('hi', 'gher'), ('REESTABLISH', 'MENT')]
AmSn18930921-V08-37-page4.txt: [('the', 'Es')]
AmSn18930928-V08-38-page5.txt: [('SEN', 'TINEL')]
AmSn18930928-V08-38-page7.txt: [('r', 'esearches')]
AmSn18930928-V08-38-page8.txt: [('m', 'ade')]
AmSn18931012-V08-40-page2.txt: [('SEN', 'TINEL')]
AmSn18931012-V08-40-page3.txt: [('mod', 'ern'), ('PUN', 'ISHING')]
AmSn18931012-V08-40-page7.txt: [('DICTION', 'ARY'), ('sub', 'stantially')]
AmSn18931019-V08-41-page7.txt: [('P', 'UBLIC'), ('W', 'ork'), ('C', 'oncluding'), ('de', 'scription')]
AmSn18931026-V08-42-page2.txt: [('are', 'Ca'), ('Ca', 'sar')]
AmSn18931026-V08-42-page3.txt: [('dec', 'laration')]
AmSn18931026-V08-42-page6.txt: [('r', 'ather')]
AmSn18931026-V08-42-page7.txt: [('GEN', 'ERAL'), ('HEN', 'RY'), ('GAZ', 'ETTEER')]
AmSn18931026-V08-42-page8.txt: [('o', 'ne')]
AmSn18931102-V08-43-page4.txt: [('hob', 'nobbing')]
AmSn18931102-V08-43-page8.txt: [('to', 'co')]
AmSn18931109-V08-44-page2.txt: [('sub', 'jects')]
AmSn18931109-V08-44-page8.txt: [('j', 'udgment')]
AmSn18931116-V08-45-page1.txt: [('phys', 'ically')]
AmSn18931116-V08-45-page3.txt: [('NO', 'VEMBER')]
AmSn18931116-V08-45-page7.txt: [('p', 'es')]
AmSn18931116-V08-45-page8.txt: [('SEN', 'TINEL')]
AmSn18931123-V08-46-page1.txt: [('are', 'Ca')]
AmSn18931123-V08-46-page2.txt: [('SEN', 'TINEL')]
AmSn18931123-V08-46-page4.txt: [('light', 'heartedness'), ('per', 'se')]
AmSn18931130-V08-47-page1.txt: [('contempo', 'raneous')]
AmSn18931130-V08-47-page6.txt: [('threat', 'ened')]
AmSn18931207-V08-48-page1.txt: [('char', 'acter'), ('and', 're'), ('trans', 'gress')]
AmSn18931207-V08-48-page4.txt: [('pro', 'tected'), ('pro', 'Vides'), ('establish', 'Ment')]
AmSn18931214-V08-49-page5.txt: [('SEN', 'TINEL'), ('B', 'arabbas')]
AmSn18931221-V08-50-page3.txt: [('heart', 'ily')]
AmSn18931221-V08-50-page8.txt: [('ten', 'ts'), ('A', 'ccording')]
AmSn18940104-V09-01-page1.txt: [('SEN', 'TINEL')]
AmSn18940104-V09-01-page2.txt: [('con', 'tinent')]
AmSn18940104-V09-01-page8.txt: [('c', 'id')]
AmSn18940111-V09-02-page4.txt: [('o', 'rder')]
AmSn18940118-V09-03-page2.txt: [('abso', 'lutely')]
AmSn18940118-V09-03-page6.txt: [('Chris', 'tian')]
AmSn18940125-V09-04-page2.txt: [('enthu', 'siastic')]
AmSn18940125-V09-04-page7.txt: [('P', 'UBLIC')]
AmSn18940201-V09-05-page1.txt: [('Y', 'ork')]
AmSn18940201-V09-05-page2.txt: [('bus', 'es')]
AmSn18940201-V09-05-page6.txt: [('d', 'ay')]
AmSn18940208-V09-06-page2.txt: [('cha', 'mpion')]
AmSn18940308-V09-10-page2.txt: [('A', 'NT')]
AmSn18940315-V09-11-page1.txt: [('SEN', 'TINEL')]
AmSn18940315-V09-11-page2.txt: [('to', 'Ca')]
AmSn18940322-V09-12-page6.txt: [('Chris', 'tian')]
AmSn18940322-V09-12-page7.txt: [('diction', 'ary')]
AmSn18940329-V09-13-page3.txt: [('tes', 'tifies')]
AmSn18940329-V09-13-page4.txt: [('A', 'nd')]
AmSn18940329-V09-13-page7.txt: [('GAZ', 'ETTEER')]
AmSn18940405-V09-14-page2.txt: [('CA', 'NT')]
AmSn18940405-V09-14-page7.txt: [('de', 'claring'), ('GAZ', 'ETTEER')]
AmSn18940412-V09-15-page2.txt: [('a', 'll')]
AmSn18940412-V09-15-page6.txt: [('reli', 'gious')]
AmSn18940412-V09-15-page8.txt: [('w', 'ork')]
AmSn18940419-V09-16-page7.txt: [('GAZ', 'ETTEER')]
AmSn18940426-V09-17-page5.txt: [('Con', 'stitutional')]
AmSn18940503-V09-18-page4.txt: [('u', 'nited')]
AmSn18940503-V09-18-page5.txt: [('bay', 'onet')]
AmSn18940503-V09-18-page7.txt: [('Y', 'es')]
AmSn18940510-V09-19-page5.txt: [('SEN', 'TINEL'), ('author', 'ity')]
AmSn18940524-V09-21-page6.txt: [('u', 'pon')]
AmSn18940524-V09-21-page8.txt: [('to', 'co'), ('w', 'ith')]
AmSn18940531-V09-22-page3.txt: [('present', 'ation')]
AmSn18940607-V09-23-page6.txt: [('far', 'cical'), ('per', 'mitted')]
AmSn18940614-V09-24-page2.txt: [('CON', 'STITUTIONAL')]
AmSn18940614-V09-24-page3.txt: [('LIB', 'ERTY'), ('CON', 'TROL'), ('ins', 'tructions'), ('V', 'ulture')]
AmSn18940614-V09-24-page7.txt: [('sub', 'stantial')]
AmSn18940621-V09-25-page5.txt: [('SEN', 'TINEL')]
AmSn18940621-V09-25-page7.txt: [('sub', 'stantial'), ('diction', 'ary')]
AmSn18940628-V09-26-page3.txt: [('polit', 'ical')]
AmSn18940628-V09-26-page5.txt: [('in', 'jects'), ('The', 'osophic')]
AmSn18940712-V09-28-page1.txt: [('pro', 'duction'), ('govern', 'ment')]
AmSn18940719-V09-29-page5.txt: [('he', 're')]
AmSn18940726-V09-30-page1.txt: [('JON', 'ES')]
AmSn18940802-V09-31-page4.txt: [('com', 'plex')]
AmSn18940823-V09-33-page3.txt: [('SEN', 'TINEL')]
AmSn18940823-V09-33-page4.txt: [('Switz', 'erland')]
AmSn18940823-V09-33-page6.txt: [('Ch', 'ristian')]
AmSn18940830-V09-34-page2.txt: [('e', 'ther')]
AmSn18940830-V09-34-page5.txt: [('pro', 'duced')]
AmSn18940830-V09-34-page7.txt: [('PRO', 'PHECY')]
AmSn18940906-V09-35-page3.txt: [('shame', 'ful')]
AmSn18940906-V09-35-page7.txt: [('GEN', 'ERAL')]
AmSn18940913-V09-36-page3.txt: [('contra', 'dictions')]
AmSn18940913-V09-36-page6.txt: [('M', 'inor')]
AmSn18940920-V09-37-page2.txt: [('in', 'vite'), ('contra', 'ry')]
AmSn18940920-V09-37-page3.txt: [('con', 'trol')]
AmSn18940927-V09-38-page6.txt: [('H', 'AL'), ('A', 'NT')]
AmSn18941004-V09-39-page1.txt: [('a', 'reli'), ('reli', 'gious')]
AmSn18941004-V09-39-page4.txt: [('Depart', 'ment')]
AmSn18941004-V09-39-page5.txt: [('s', 'ap'), ('la', 'gs'), ('E', 'gg'), ('P', 'ct')]
AmSn18941004-V09-39-page7.txt: [('por', 'trayal'), ('A', 'tE'), ('congregation', 'al')]
AmSn18941004-V09-39-page8.txt: [('a', 'id')]
AmSn18941011-V09-40-page2.txt: [('bu', 'ilded')]
AmSn18941018-V09-41-page2.txt: [('foun', 'dation')]
AmSn18941025-V09-42-page6.txt: [('C', 'tr')]
AmSn18941025-V09-42-page7.txt: [('regis', 'tered')]
AmSn18941025-V09-42-page8.txt: [('AMER', 'ICAN')]
AmSn18941115-V09-45-page2.txt: [('pro', 'phets')]
AmSn18941115-V09-45-page4.txt: [('gov', 'ernment')]
AmSn18941115-V09-45-page7.txt: [('pro', 'fusely')]
AmSn18941115-V09-45-page8.txt: [('el', 'se'), ('to', 'co')]
AmSn18941122-V09-46-page5.txt: [('well', 'es'), ('com', 'munity')]
AmSn18941129-V09-47-page2.txt: [('enforce', 'th')]
AmSn18941129-V09-47-page7.txt: [('R', 'eligious'), ('W', 'eekly')]
AmSn18941213-V09-49-page3.txt: [('Pa', 'ternoster')]
AmSn18941213-V09-49-page4.txt: [('f', 'ainteth')]
AmSn18941220-V09-50-page10.txt: [('Bap', 'tists')]
AmSn18941220-V09-50-page6.txt: [('to', 'ning'), ('the', 're'), ('sew', 'erage')]
AmSn18941220-V09-50-page7.txt: [('stra', 'nger')]
AmSn18941220-V09-50-page8.txt: [('SEN', 'TINEL')]
AmSn18941220-V09-50-page9.txt: [('TO', 'TEM')]
AmSn18950103-V10-01-page1.txt: [('SEN', 'TINEL'), ('ac', 'complished')]
AmSn18950103-V10-01-page7.txt: [('H', 'ELPFUL')]
AmSn18950110-V10-02-page1.txt: [('contempora', 'ry'), ('e', 'ternally'), ('pun', 'ishable')]
AmSn18950110-V10-02-page2.txt: [('faith', 'ful')]
AmSn18950110-V10-02-page4.txt: [('amend', 'ments')]
AmSn18950117-V10-03-page7.txt: [('in', 'quirer')]
AmSn18950117-V10-03-page8.txt: [('Prot', 'estant'), ('AMER', 'ICAN')]
AmSn18950124-V10-04-page2.txt: [('Rom', 'anists')]
AmSn18950124-V10-04-page3.txt: [('ques', 'tions')]
AmSn18950124-V10-04-page7.txt: [('G', 'raphic'), ('W', 'ith')]
AmSn18950124-V10-04-page8.txt: [('AMER', 'ICAN')]
AmSn18950131-V10-05-page3.txt: [('M', 'KT')]
AmSn18950131-V10-05-page4.txt: [('Le', 'Iter')]
AmSn18950131-V10-05-page7.txt: [('SCRIP', 'TURAL'), ('Y', 'ork')]
AmSn18950214-V10-07-page1.txt: [('SEN', 'TINEL')]
AmSn18950214-V10-07-page4.txt: [('the', 're')]
AmSn18950214-V10-07-page6.txt: [('Govern', 'ment')]
AmSn18950221-V10-08-page1.txt: [('SEN', 'TINEL')]
AmSn18950221-V10-08-page3.txt: [('SEN', 'TINEL')]
AmSn18950221-V10-08-page4.txt: [('SEN', 'TINEL')]
AmSn18950228-V10-09-page2.txt: [('a', 'pplication')]
AmSn18950228-V10-09-page4.txt: [('ex', 'emptions')]
AmSn18950228-V10-09-page6.txt: [('Se', 'dalia')]
AmSn18950228-V10-09-page7.txt: [('I', 'CE')]
AmSn18950307-V10-10-page8.txt: [('SEN', 'TINEL')]
AmSn18950314-V10-11-page2.txt: [('Method', 'ist')]
AmSn18950321-V10-12-page5.txt: [('so', 'ciety')]
AmSn18950328-V10-13-page4.txt: [('improve', 'ments')]
AmSn18950328-V10-13-page6.txt: [('to', 'ke')]
AmSn18950328-V10-13-page7.txt: [('pro', 'fusely')]
AmSn18950404-V10-14-page5.txt: [('peo', 'ple')]
AmSn18950404-V10-14-page6.txt: [('in', 'quirer')]
AmSn18950404-V10-14-page8.txt: [('edit', 'orial')]
AmSn18950411-V10-15-page2.txt: [('de', 'nial')]
AmSn18950411-V10-15-page5.txt: [('et', 'tA')]
AmSn18950418-V10-16-page7.txt: [('pro', 'tem'), ('en', 'couraged')]
AmSn18950425-V10-17-page1.txt: [('minis', 'ters')]
AmSn18950425-V10-17-page3.txt: [('a', 'nd')]
AmSn18950425-V10-17-page6.txt: [('B', 'argain')]
AmSn18950425-V10-17-page7.txt: [('con', 'dition')]
AmSn18950502-V10-18-page1.txt: [('state', 'ments')]
AmSn18950502-V10-18-page5.txt: [('judi', 'cial')]
AmSn18950502-V10-18-page6.txt: [('De', 'mand')]
AmSn18950502-V10-18-page7.txt: [('to', 're')]
AmSn18950509-V10-19-page3.txt: [('AMER', 'ICAN')]
AmSn18950509-V10-19-page4.txt: [('ha', 've')]
AmSn18950509-V10-19-page6.txt: [('we', 'ek')]
AmSn18950516-V10-20-page4.txt: [('chris', 'tening')]
AmSn18950523-V10-21-page1.txt: [('de', 'feated')]
AmSn18950523-V10-21-page3.txt: [('esta', 'blished')]
AmSn18950523-V10-21-page5.txt: [('big', 'otry')]
AmSn18950523-V10-21-page7.txt: [('ToRI', 'ES'), ('diction', 'ary')]
AmSn18950523-V10-21-page8.txt: [('Prot', 'estants'), ('ma', 'jority')]
AmSn18950530-V10-22-page5.txt: [('SEN', 'TINEL')]
AmSn18950530-V10-22-page8.txt: [('con', 'viction')]
AmSn18950606-V10-23-page4.txt: [('LEG', 'ISLATION'), ('PUNISH', 'MENT'), ('PUN', 'ISHED'), ('CHRIS', 'TIAN')]
AmSn18950606-V10-23-page8.txt: [('SEN', 'TINEL')]
AmSn18950613-V10-24-page1.txt: [('Y', 'ork'), ('ques', 'tions')]
AmSn18950613-V10-24-page7.txt: [('N', 'inety')]
AmSn18950620-V10-25-page1.txt: [('an', 'cient')]
AmSn18950620-V10-25-page2.txt: [('for', 'merly')]
AmSn18950620-V10-25-page5.txt: [('in', 'sidious'), ('AMER', 'ICAN')]
AmSn18950620-V10-25-page7.txt: [('A', 'ttractive'), ('A', 'merican'), ('Y', 'ork'), ('V', 'ersion')]
AmSn18950627-V10-26-page3.txt: [('A', 'MI')]
AmSn18950627-V10-26-page6.txt: [('lib', 'erty')]
AmSn18950627-V10-26-page7.txt: [('Ad', 'ventists')]
AmSn18950718-V10-29-page7.txt: [('a', 're')]
AmSn18950718-V10-29-page8.txt: [('SEN', 'TINEL')]
AmSn18950725-V10-30-page5.txt: [('depre', 'cated')]
AmSn18950801-V10-31-page3.txt: [('Judge', 'th')]
AmSn18950801-V10-31-page5.txt: [('REA', 'VIS')]
AmSn18950815-V10-32-page8.txt: [('reli', 'gious')]
AmSn18950829-V10-34-page4.txt: [('MOVE', 'MENT')]
AmSn18950829-V10-34-page8.txt: [('SEN', 'TINEL')]
AmSn18950905-V10-35-page2.txt: [('SEN', 'TINEL')]
AmSn18950905-V10-35-page7.txt: [('B', 'IB')]
AmSn18950919-V10-37-page5.txt: [('Cath', 'olic')]
AmSn18950926-V10-38-page5.txt: [('in', 'fluence'), ('the', 're')]
AmSn18950926-V10-38-page6.txt: [('E', 'LI')]
AmSn18950926-V10-38-page7.txt: [('GEN', 'ERAL'), ('GAZ', 'ETTEER')]
AmSn18951017-V10-41-page8.txt: [('a', 'mis')]
AmSn18951024-V10-42-page7.txt: [('a', 'll'), ('A', 'Il'), ('all', 'ee'), ('T', 'ia')]
AmSn18951031-V10-43-page6.txt: [('Men', 'stealers')]
AmSn18951031-V10-43-page7.txt: [('i', 'ri'), ('n', 'orw'), ('endure', 'th'), ('e', 'lf')]
AmSn18951107-V10-44-page3.txt: [('sal', 'vation')]
AmSn18951107-V10-44-page7.txt: [('pro', 'fusely')]
AmSn18951121-V10-46-page5.txt: [('reg', 'ular'), ('or', 'ganization')]
AmSn18951121-V10-46-page7.txt: [('Y', 'ork')]
AmSn18951128-V10-47-page6.txt: [('S', 'weet')]
AmSn18951128-V10-47-page7.txt: [('o', 'ne')]
AmSn18951128-V10-47-page8.txt: [('W', 'ine'), ('GEN', 'ERAL')]
AmSn18960102-V11-01-page8.txt: [('SEN', 'TINEL')]
AmSn18960109-V11-02-page7.txt: [('St', 'ates'), ('V', 'ery')]
AmSn18960116-V11-03-page2.txt: [('gover', 'nment')]
AmSn18960116-V11-03-page7.txt: [('pro', 'fusely')]
AmSn18960130-V11-05-page6.txt: [('a', 'nd')]
AmSn18960213-V11-07-page1.txt: [('ED', 'ITORS')]
AmSn18960213-V11-07-page3.txt: [('dam', 'es')]
AmSn18960220-V11-08-page2.txt: [('IN', 'SP')]
AmSn18960220-V11-08-page4.txt: [('prop', 'erly')]
AmSn18960220-V11-08-page5.txt: [('au', 'th'), ('of', 'ttimes')]
AmSn18960220-V11-08-page6.txt: [('to', 'wns')]
AmSn18960220-V11-08-page7.txt: [('nan', 'na'), ('sim', 'ple'), ('W', 'ith')]
AmSn18960220-V11-08-page8.txt: [('SEN', 'TINEL'), ('SEN', "TINEL'S")]
AmSn18960227-V11-09-page3.txt: [('attend', 'ance')]
AmSn18960227-V11-09-page8.txt: [('AMER', 'ICAN')]
AmSn18960305-V11-10-page1.txt: [('the', 'Ca')]
AmSn18960305-V11-10-page6.txt: [('CA', 'NT')]
AmSn18960305-V11-10-page7.txt: [('A', 'GA')]
AmSn18960305-V11-10-page8.txt: [('b', 'Ra')]
AmSn18960312-V11-11-page5.txt: [('AMER', 'ICAN')]
AmSn18960312-V11-11-page7.txt: [('W', 'ith')]
AmSn18960319-V11-12-page1.txt: [('e', 'cu')]
AmSn18960319-V11-12-page3.txt: [('Chris', 'tianity')]
AmSn18960326-V11-13-page5.txt: [('in', 'vaded')]
AmSn18960326-V11-13-page7.txt: [('b', 'irders')]
AmSn18960409-V11-15-page3.txt: [('fur', 'therance')]
AmSn18960409-V11-15-page5.txt: [('ON', 'TARIO')]
AmSn18960507-V11-19-page5.txt: [('de', 'mand')]
AmSn18960514-V11-20-page3.txt: [('to', 'wn')]
AmSn18960514-V11-20-page7.txt: [('CLA', 'SS'), ('She', 'lah')]
AmSn18960521-V11-21-page3.txt: [('AD', 'VENTIST')]
AmSn18960528-V11-22-page3.txt: [('pro', 'hibited')]
AmSn18960604-V11-23-page1.txt: [('rec', 'eives')]
AmSn18960604-V11-23-page2.txt: [('See', 'th')]
AmSn18960604-V11-23-page4.txt: [('pro', 'hibition')]
AmSn18960604-V11-23-page7.txt: [('Me', 'shullam'), ('a', 'Ne'), ('t', 'Ex'), ('nah', 'Ma'), ('Le', 'vites'), ('DAn', 'iel')]
AmSn18960611-V11-24-page3.txt: [('En', 'deavor')]
AmSn18960618-V11-25-page7.txt: [('go', 'ld')]
AmSn18960702-V11-26-page1.txt: [('DEC', 'LARATION')]
AmSn18960702-V11-26-page2.txt: [('be', 'lieve')]
AmSn18960702-V11-26-page3.txt: [('Ref', 'orm')]
AmSn18960702-V11-26-page7.txt: [('Pro', 'hibition'), ('perfect', 'ly'), ('Limit', 'ations')]
AmSn18960716-V11-28-page2.txt: [('are', 'Ca')]
AmSn18960716-V11-28-page4.txt: [('the', 're')]
AmSn18960716-V11-28-page7.txt: [('Ha', 'shum'), ('V', 'al'), ('NEH', 'EMIAH'), ('r', 'Ex'), ('Ha', 'shub'), ('nah', 'Ma'), ('E', 'zra'), ('a', 'bi'), ('Le', 'vites')]
AmSn18960723-V11-29-page1.txt: [('J', 'ULY')]
AmSn18960723-V11-29-page5.txt: [('con', 'ventions')]
AmSn18960723-V11-29-page7.txt: [('A', 'lian')]
AmSn18960730-V11-30-page2.txt: [('des', 'tinies')]
AmSn18960730-V11-30-page4.txt: [('Script', 'UreS')]
AmSn18960806-V11-31-page2.txt: [('Dan', 'iel')]
AmSn18960813-V11-32-page7.txt: [('jah', 'Az'), ('Ha', 'shub'), ('a', 'NS')]
AmSn18960820-V11-33-page6.txt: [('now', 'adays')]
AmSn18960820-V11-33-page7.txt: [('Le', 'vites'), ('a', 'Ne')]
AmSn18960827-V11-34-page5.txt: [('T', 'wo')]
AmSn18960827-V11-34-page7.txt: [('A', 'merican')]
AmSn18960827-V11-34-page8.txt: [('a', 'ny')]
AmSn18960903-V11-35-page2.txt: [('to', 're')]
AmSn18960903-V11-35-page3.txt: [('a', 'mo')]
AmSn18960903-V11-35-page5.txt: [('inter', 'esting')]
AmSn18960903-V11-35-page7.txt: [('a', 'sa'), ('Le', 'vItes'), ('a', 'liah'), ('I', 'ra'), ('a', 'Ne'), ('He', 'zir')]
AmSn18960910-V11-36-page3.txt: [('A', 'll'), ('as', 'semblies')]
AmSn18960910-V11-36-page7.txt: [('E', 'zra')]
AmSn18960917-V11-37-page2.txt: [('in', 'dictment'), ('d', 'iscourse')]
AmSn18960917-V11-37-page3.txt: [('Demo', 'crat')]
AmSn18960917-V11-37-page7.txt: [('G', 'aG'), ('nah', 'MA')]
AmSn18960924-V11-38-page5.txt: [('Ruth', 'erford')]
AmSn18960924-V11-38-page7.txt: [('Y', 'ork'), ('Le', 'vites')]
AmSn18961015-V11-41-page1.txt: [('Cardin', 'al')]
AmSn18961015-V11-41-page7.txt: [('t', 'ow')]
AmSn18961015-V11-41-page8.txt: [('Aguas', 'Calientes')]
AmSn18961022-V11-42-page1.txt: [('de', 'manded'), ('dema', 'nd')]
AmSn18961022-V11-42-page3.txt: [('rec', 'ognized'), ('spir', 'itual')]
AmSn18961022-V11-42-page4.txt: [('ME', 'th')]
AmSn18961029-V11-43-page1.txt: [('per', 'se')]
AmSn18961029-V11-43-page3.txt: [('CHRIS', 'TIAN')]
AmSn18961029-V11-43-page7.txt: [('v', 'olumes'), ('W', 'ith')]
AmSn18961029-V11-43-page8.txt: [('of', 'ficio')]
AmSn18961105-V11-44-page2.txt: [('polit', 'ical')]
AmSn18961105-V11-44-page7.txt: [('de', 'scription')]
AmSn18961112-V11-45-page2.txt: [('SEN', 'TINEL'), ('A', 'dventists'), ('CIT', 'IZENSHIP')]
AmSn18961119-V11-46-page3.txt: [('per', 'mit')]
AmSn18961119-V11-46-page7.txt: [('Le', 'vites')]
AmSn18961126-V11-47-page4.txt: [('AMER', 'ICAN'), ('GOVERN', 'MENT')]
AmSn18961126-V11-47-page5.txt: [('SEN', 'TINEL')]
AmSn18961126-V11-47-page6.txt: [('Su', 'nday')]
AmSn18961126-V11-47-page8.txt: [('SEN', 'TINEL')]
AmSn18961203-V11-48-page5.txt: [('Chris', 'tian')]
AmSn18961203-V11-48-page6.txt: [('SEN', 'TINEL'), ('Christian', 'ity')]
AmSn18961203-V11-48-page7.txt: [('N', 'th'), ('a', 'NS')]
AmSn18961217-V11-50-page6.txt: [('e', 'arly')]
AmSn18961217-V11-50-page7.txt: [('K', 'ey'), ('a', 'Ne')]
AmSn18961224-V11-51-page1.txt: [('s', 'ubterfuge')]
AmSn18961224-V11-51-page3.txt: [('de', 'manding')]
AmSn18961224-V11-51-page4.txt: [('storekee', 'pers')]
AmSn18961224-V11-51-page6.txt: [('A', 'Mt')]
AmSn18961224-V11-51-page7.txt: [('import', 'ance'), ('Ba', 'ni'), ('i', 'll'), ('a', 'Ne'), ('HA', 'sh'), ('Ha', 'rIph'), ('Le', 'vites')]
AmSn18961224-V11-51-page8.txt: [('SEN', 'TINEL')]
AmSn18970107-V12-01-page10.txt: [('repudi', 'ated')]
AmSn18970107-V12-01-page16.txt: [('Le', 'vites'), ('A', 'MERICA')]
AmSn18970114-V12-02-page14.txt: [('A', 'MERICAN')]
AmSn18970114-V12-02-page4.txt: [('entertainmen', 'ts')]
AmSn18970121-V12-03-page1.txt: [('A', 'LONZO')]
AmSn18970121-V12-03-page13.txt: [('Govern', 'ment')]
AmSn18970121-V12-03-page7.txt: [('a', 'nd')]
AmSn18970121-V12-03-page8.txt: [('the', 'Ca'), ('Christia', 'nity')]
AmSn18970128-V12-04-page1.txt: [('a', 'nd')]
AmSn18970128-V12-04-page13.txt: [('Pry', "or's")]
AmSn18970128-V12-04-page6.txt: [('work', 'ers'), ('pres', 'ent')]
AmSn18970204-V12-05-page14.txt: [('sub', 'ject')]
AmSn18970204-V12-05-page4.txt: [('P', 'Al')]
AmSn18970204-V12-05-page6.txt: [('sus', 'tained')]
AmSn18970204-V12-05-page9.txt: [('forma', 'tion')]
AmSn18970211-V12-06-page14.txt: [('AMER', 'ICAN')]
AmSn18970211-V12-06-page2.txt: [('DE', 'CISIONS')]
AmSn18970211-V12-06-page6.txt: [('de', 'cided')]
AmSn18970211-V12-06-page8.txt: [('cir', 'cumspection')]
AmSn18970218-V12-07-page15.txt: [('in', 'terest')]
AmSn18970218-V12-07-page16.txt: [('nah', 'MA')]
AmSn18970225-V12-08-page1.txt: [('A', 'LONZO')]
AmSn18970225-V12-08-page11.txt: [('SEC', 'TIONS')]
AmSn18970225-V12-08-page15.txt: [('contin', 'ual')]
AmSn18970225-V12-08-page4.txt: [('End', 'eavorer')]
AmSn18970225-V12-08-page6.txt: [('sub', 'ject'), ('pla', 'ces')]
AmSn18970304-V12-09-page10.txt: [('A', 'MERICAN')]
AmSn18970311-V12-10-page14.txt: [('m', 'uch')]
AmSn18970311-V12-10-page2.txt: [('exam', 'ine')]
AmSn18970311-V12-10-page7.txt: [('E', 'RI')]
AmSn18970318-V12-11-page14.txt: [('Kan', 'sas')]
AmSn18970318-V12-11-page15.txt: [('P', 'ACIFIC'), ('in', 'terest')]
AmSn18970318-V12-11-page7.txt: [('con', 'demned')]
AmSn18970318-V12-11-page8.txt: [('J', 'oash')]
AmSn18970325-V12-12-page10.txt: [('dispe', 'nse')]
AmSn18970325-V12-12-page16.txt: [('W', 'ith'), ('sim', 'ple')]
AmSn18970325-V12-12-page3.txt: [('im', 'portance')]
AmSn18970401-V12-13-page15.txt: [('P', 'olitical')]
AmSn18970401-V12-13-page16.txt: [('W', 'ith')]
AmSn18970401-V12-13-page2.txt: [('con', 'cerning'), ('ChriS', 'tians')]
AmSn18970401-V12-13-page7.txt: [('r', 'um')]
AmSn18970401-V12-13-page8.txt: [('a', 'sk')]
AmSn18970408-V12-14-page10.txt: [('p', 'urely')]
AmSn18970408-V12-14-page11.txt: [('SEN', 'TINEL')]
AmSn18970408-V12-14-page15.txt: [('pro', 'greSSed')]
AmSn18970408-V12-14-page16.txt: [('W', 'ith'), ('M', 'RS')]
AmSn18970408-V12-14-page9.txt: [('fundament', 'al')]
AmSn18970415-V12-15-page2.txt: [('ma', 'jority')]
AmSn18970415-V12-15-page4.txt: [('appoint', 'ment')]
AmSn18970415-V12-15-page9.txt: [('a', 'nd')]
AmSn18970422-V12-16-page13.txt: [('spirit', 'ual')]
AmSn18970422-V12-16-page7.txt: [('t', 'wo')]
AmSn18970429-V12-17-page1.txt: [('a', 'reli')]
AmSn18970429-V12-17-page10.txt: [('Is', 'rael')]
AmSn18970429-V12-17-page11.txt: [('to', 'es')]
AmSn18970429-V12-17-page15.txt: [('in', 'terest')]
AmSn18970429-V12-17-page16.txt: [('N', 'th'), ('Le', 'vites')]
AmSn18970429-V12-17-page4.txt: [('d', 'ay')]
AmSn18970506-V12-18-page1.txt: [('sim', 'plicity')]
AmSn18970506-V12-18-page12.txt: [('en', 'forcement')]
AmSn18970506-V12-18-page13.txt: [('god', 'liness'), ('Chris', 'tian')]
AmSn18970506-V12-18-page4.txt: [('degrada', 'tion')]
AmSn18970513-V12-19-page12.txt: [('as', 'certaining')]
AmSn18970513-V12-19-page13.txt: [('inter', 'rupted')]
AmSn18970513-V12-19-page4.txt: [('pro', 'mulgation')]
AmSn18970513-V12-19-page9.txt: [('hun', 'dreds')]
AmSn18970520-V12-20-page1.txt: [('cor', 'ruptible')]
AmSn18970520-V12-20-page14.txt: [('w', 'ith'), ('m', 'etal')]
AmSn18970520-V12-20-page15.txt: [('in', 'terest'), ('M', 'atthew')]
AmSn18970520-V12-20-page9.txt: [('a', 'ngel'), ('sin', 'gle'), ('a', 'bsolutely')]
AmSn18970527-V12-21-page1.txt: [('k', 'OA')]
AmSn18970527-V12-21-page14.txt: [('f', 'ORK')]
AmSn18970527-V12-21-page15.txt: [('in', 'terest')]
AmSn18970603-V12-22-page11.txt: [('in', 'dulged')]
AmSn18970603-V12-22-page13.txt: [('a', 'nd'), ('con', 'cerned')]
AmSn18970603-V12-22-page14.txt: [('m', 'onths')]
AmSn18970603-V12-22-page16.txt: [('W', 'ith')]
AmSn18970603-V12-22-page6.txt: [('d', 'oors')]
AmSn18970603-V12-22-page7.txt: [('a', 'll')]
AmSn18970603-V12-22-page8.txt: [('In', 'asmuch'), ('m', 'em')]
AmSn18970610-V12-23-page13.txt: [('de', 'nominational')]
AmSn18970610-V12-23-page5.txt: [('in', 'struction')]
AmSn18970610-V12-23-page6.txt: [('pe', 'culiar')]
AmSn18970610-V12-23-page9.txt: [('per', 'se')]
AmSn18970617-V12-24-page3.txt: [('A', 'MERICAN')]
AmSn18970624-V12-25-page4.txt: [('right', 'eousness')]
AmSn18970624-V12-25-page6.txt: [('Fed', 'eral')]
AmSn18970701-V12-26-page11.txt: [('con', 'vention')]
AmSn18970701-V12-26-page14.txt: [('SEN', 'TINEL')]
AmSn18970701-V12-26-page2.txt: [('dis', 'tinctly')]
AmSn18970701-V12-26-page3.txt: [('a', 'nd')]
AmSn18970707-V12-27-page11.txt: [('Hin', 'doos')]
AmSn18970707-V12-27-page14.txt: [('in', 'terest'), ('cal', 'ender')]
AmSn18970707-V12-27-page15.txt: [('G', 'od')]
AmSn18970707-V12-27-page4.txt: [('SEN', 'TINEL')]
AmSn18970707-V12-27-page6.txt: [('govern', 'ments')]
AmSn18970715-V12-28-page13.txt: [('A', 'MERICAN')]
AmSn18970715-V12-28-page14.txt: [('in', 'terest')]
AmSn18970715-V12-28-page7.txt: [('ha', 'th'), ('h', 'ome')]
AmSn18970715-V12-28-page8.txt: [('sig', 'nificant')]
AmSn18970722-V12-29-page1.txt: [('ex', 'cept')]
AmSn18970722-V12-29-page10.txt: [('consti', 'tutional')]
AmSn18970722-V12-29-page14.txt: [('in', 'terest')]
AmSn18970722-V12-29-page5.txt: [('the', 'se')]
AmSn18970729-V12-30-page14.txt: [('in', 'terest')]
AmSn18970729-V12-30-page16.txt: [('W', 'ith'), ('sim', 'ple')]
AmSn18970805-V12-31-page1.txt: [('prof', 'itable')]
AmSn18970805-V12-31-page10.txt: [('a', 'nd')]
AmSn18970805-V12-31-page13.txt: [('SEN', 'TINEL')]
AmSn18970805-V12-31-page14.txt: [('in', 'terest')]
AmSn18970812-V12-32-page15.txt: [('Y', 'ork')]
AmSn18970812-V12-32-page6.txt: [('con', 'fidently')]
AmSn18970812-V12-32-page7.txt: [('SEN', 'TINEL')]
AmSn18970819-V12-33-page14.txt: [('con', 'nected'), ('Y', 'ork')]
AmSn18970819-V12-33-page16.txt: [('W', 'ith'), ('sim', 'ple')]
AmSn18970819-V12-33-page2.txt: [('a', 're'), ('Chris', 'tians')]
AmSn18970819-V12-33-page5.txt: [('Cir', 'cumcision')]
AmSn18970909-V12-35-page14.txt: [('Y', 'ork')]
AmSn18970909-V12-35-page16.txt: [('m', 'oth')]
AmSn18970916-V12-36-page4.txt: [('y', 'ou')]
AmSn18970916-V12-36-page6.txt: [('inte', 'nts')]
AmSn18970923-V12-37-page12.txt: [('disap', 'pointments')]
AmSn18970923-V12-37-page13.txt: [('Y', 'ork')]
AmSn18970923-V12-37-page14.txt: [('pub', 'lished')]
AmSn18970923-V12-37-page15.txt: [('T', 'ao'), ('a', 'ce')]
AmSn18970923-V12-37-page16.txt: [('n', 'ote'), ('n', 'otes')]
AmSn18970923-V12-37-page4.txt: [('be', 'ng'), ('in', 'terests')]
AmSn18970923-V12-37-page6.txt: [('a', 'nd')]
AmSn18970923-V12-37-page8.txt: [('real', 'ization')]
AmSn18970923-V12-37-page9.txt: [('polit', 'ical'), ('de', 'termined')]
AmSn18970930-V12-38-page13.txt: [('SEN', 'TINEL')]
AmSn18970930-V12-38-page16.txt: [('ra', 'ca'), ('ho', 'Ts'), ('g', 'rin')]
AmSn18970930-V12-38-page4.txt: [('con', 'vention'), ('in', 'toxicated')]
AmSn18970930-V12-38-page6.txt: [('A', 'MERICAN')]
AmSn18971007-V12-39-page13.txt: [('ad', 'vocate')]
AmSn18971007-V12-39-page9.txt: [('Amer', 'ican')]
AmSn18971014-V12-40-page16.txt: [('p', 'PM'), ('I', 'ce'), ('i', 'SL'), ('w', 'ei'), ('s', 'Om'), ('I', 're'), ('m', 'id')]
AmSn18971021-V12-41-page12.txt: [('won', 'dered')]
AmSn18971021-V12-41-page13.txt: [('j', 'oy')]
AmSn18971021-V12-41-page14.txt: [('Dic', 'tionary'), ('the', 're')]
AmSn18971021-V12-41-page16.txt: [('a', 'ka'), ('A', 'ddress'), ('R', 'cd'), ('a', 'ft'), ('I', 're')]
AmSn18971021-V12-41-page2.txt: [('attend', 'ance')]
AmSn18971021-V12-41-page5.txt: [('im', 'agined')]
AmSn18971021-V12-41-page6.txt: [('d', 'welleth')]
AmSn18971021-V12-41-page9.txt: [('A', 'VER')]
AmSn18971028-V12-42-page1.txt: [('SEN', 'TINEL')]
AmSn18971028-V12-42-page14.txt: [('in', 'terest')]
AmSn18971028-V12-42-page16.txt: [('a', 'Ne'), ('a', 'li')]
AmSn18971028-V12-42-page2.txt: [('SEN', 'TINEL')]
AmSn18971028-V12-42-page9.txt: [('a', 'nd')]
AmSn18971104-V12-43-page14.txt: [('in', 'terest')]
AmSn18971104-V12-43-page16.txt: [('a', 'Ne')]
AmSn18971104-V12-43-page3.txt: [('as', 'serted'), ('di', 'rection')]
AmSn18971104-V12-43-page9.txt: [('Insp', 'iration')]
AmSn18971111-V12-44-page15.txt: [('in', 'ca')]
AmSn18971111-V12-44-page16.txt: [('Me', 'shullam'), ('a', 'Ne')]
AmSn18971111-V12-44-page2.txt: [('a', 'nd')]
AmSn18971111-V12-44-page8.txt: [('a', 'bundantly')]
AmSn18971118-V12-45-page14.txt: [('DiCtion', 'ary')]
AmSn18971118-V12-45-page15.txt: [('in', 'ti'), ('Kan', 'sas'), ('Com', 'pany')]
AmSn18971118-V12-45-page16.txt: [('Som', 'ething'), ('a', 'Ne')]
AmSn18971125-V12-46-page10.txt: [('im', 'portant')]
AmSn18971125-V12-46-page11.txt: [('We', 'll'), ('We', 're')]
AmSn18971125-V12-46-page14.txt: [('A', 'pril')]
AmSn18971125-V12-46-page15.txt: [('fur', 'nished')]
AmSn18971125-V12-46-page16.txt: [('a', 'Ne')]
AmSn18971125-V12-46-page2.txt: [('En', 'deavor')]
AmSn18971125-V12-46-page3.txt: [('in', 'stitution')]
AmSn18971202-V12-47-page1.txt: [('Chris', 'tian')]
AmSn18971202-V12-47-page12.txt: [('alw', 'ays')]
AmSn18971202-V12-47-page7.txt: [('a', 'nd')]
AmSn18971209-V12-48-page14.txt: [('Qual', 'ity'), ('in', 'ca')]
AmSn18971209-V12-48-page16.txt: [('ah', 'Az'), ('a', 'Ne')]
AmSn18971209-V12-48-page6.txt: [('pro', 'fession')]
AmSn18971209-V12-48-page7.txt: [('P', 'rotestants')]
AmSn18971216-V12-49-page13.txt: [('Com', 'bination')]
AmSn18971216-V12-49-page14.txt: [('de', 'scriptive'), ('in', 'ca')]
AmSn18971216-V12-49-page16.txt: [('H', 'eadband')]
AmSn18971216-V12-49-page6.txt: [('the', 're')]
AmSn18971216-V12-49-page7.txt: [('per', 'se')]
AmSn18971216-V12-49-page9.txt: [('ex', 'pire')]
AmSn18971230-V12-50-page1.txt: [('A', 'licia')]
AmSn18971230-V12-50-page14.txt: [('in', 'terest')]
AmSn18971230-V12-50-page4.txt: [('occa', 'sions')]
AmSn18971230-V12-50-page9.txt: [('b', 'ast')]
AmSn18980106-V13-01-page2.txt: [('ex', 'pected'), ('be', 'lieve')]
AmSn18980113-V13-02-page1.txt: [('l', 'imn'), ('in', 'stincts')]
AmSn18980113-V13-02-page13.txt: [('SEN', 'TINEL')]
AmSn18980113-V13-02-page16.txt: [('a', 'NS')]
AmSn18980113-V13-02-page4.txt: [('Chri', 'stian')]
AmSn18980113-V13-02-page5.txt: [('pos', 'sible')]
AmSn18980113-V13-02-page7.txt: [('a', 'ttending')]
AmSn18980120-V13-03-page15.txt: [('in', 'terest')]
AmSn18980120-V13-03-page16.txt: [('Th', 'ey'), ('s', 'itz')]
AmSn18980120-V13-03-page6.txt: [('de', 'cided')]
AmSn18980127-V13-04-page14.txt: [('in', 'terest'), ('Cat', 'arrh')]
AmSn18980127-V13-04-page16.txt: [('ah', 'Az'), ('T', 'OW')]
AmSn18980127-V13-04-page2.txt: [('Congregation', 'alist')]
AmSn18980127-V13-04-page4.txt: [('per', 'suaded')]
AmSn18980203-V13-05-page14.txt: [('med', 'icine')]
AmSn18980203-V13-05-page5.txt: [('r', 'espect')]
AmSn18980210-V13-06-page12.txt: [('su', 're')]
AmSn18980210-V13-06-page14.txt: [('in', 'terest')]
AmSn18980210-V13-06-page16.txt: [('the', 'se'), ('Con', 'gress')]
AmSn18980210-V13-06-page5.txt: [('Us', 'urped')]
AmSn18980210-V13-06-page9.txt: [('lib', 'erty'), ('estab', 'lished'), ('re', 'gards')]
AmSn18980217-V13-07-page14.txt: [('in', 'terest'), ('in', 'ca')]
AmSn18980217-V13-07-page15.txt: [('a', 'rak')]
AmSn18980217-V13-07-page3.txt: [('pro', 'fession')]
AmSn18980217-V13-07-page7.txt: [('A', 'MERICAN')]
AmSn18980224-V13-08-page14.txt: [('in', 'terest'), ('Wagon', 'ettes'), ('W', 'ig')]
AmSn18980224-V13-08-page15.txt: [('f', 'orgo')]
AmSn18980224-V13-08-page8.txt: [('SEN', 'TINEL')]
AmSn18980224-V13-08-page9.txt: [('fa', 'tuus')]
AmSn18980303-V13-09-page14.txt: [('in', 'terest')]
AmSn18980303-V13-09-page15.txt: [('judg', 'ments')]
AmSn18980303-V13-09-page4.txt: [('mer', 'rily')]
AmSn18980303-V13-09-page6.txt: [('in', 'clined'), ('Gov', 'ernment')]
AmSn18980310-V13-10-page14.txt: [('A', 'ddress'), ('g', 'od')]
AmSn18980310-V13-10-page15.txt: [('a', 'Ne')]
AmSn18980310-V13-10-page16.txt: [('inf', 'ormed')]
AmSn18980310-V13-10-page2.txt: [('sev', 'eral')]
AmSn18980310-V13-10-page7.txt: [('J', 'erusalem'), ('appear', 'ance')]
AmSn18980310-V13-10-page8.txt: [('faith', 'ful')]
AmSn18980317-V13-11-page14.txt: [('A', 'loth')]
AmSn18980317-V13-11-page15.txt: [('for', 'ES'), ('a', 'Ne')]
AmSn18980317-V13-11-page8.txt: [('act', 'uated')]
AmSn18980324-V13-12-page12.txt: [('see', 'th')]
AmSn18980324-V13-12-page15.txt: [('a', 'Ne')]
AmSn18980324-V13-12-page3.txt: [('gov', 'ernment')]
AmSn18980324-V13-12-page8.txt: [('Chris', 'tian')]
AmSn18980331-V13-13-page14.txt: [('in', 'terest')]
AmSn18980331-V13-13-page15.txt: [('a', 'Ne'), ('ill', 'ustrations')]
AmSn18980331-V13-13-page16.txt: [('w', 'ould')]
AmSn18980407-V13-14-page14.txt: [('in', 'terest')]
AmSn18980407-V13-14-page15.txt: [('f', 'orgo')]
AmSn18980414-V13-15-page10.txt: [('like', 'th')]
AmSn18980414-V13-15-page14.txt: [('in', 'terest'), ('in', 'ca')]
AmSn18980414-V13-15-page15.txt: [('A', 'Mt'), ('Le', 'vites'), ('a', 'Ne')]
AmSn18980414-V13-15-page3.txt: [('st', 'atement')]
AmSn18980414-V13-15-page4.txt: [('sac', 'rificed')]
AmSn18980414-V13-15-page6.txt: [('fun', 'damental')]
AmSn18980421-V13-16-page14.txt: [('in', 'terest')]
AmSn18980428-V13-17-page12.txt: [('au', 'thorities')]
AmSn18980428-V13-17-page13.txt: [('a', 'reli'), ('r', 'om'), ('fur', 'nished')]
AmSn18980428-V13-17-page14.txt: [('Kan', 'sas'), ('Com', 'pany'), ('in', 'terest')]
AmSn18980428-V13-17-page16.txt: [('Amer', 'icans')]
AmSn18980428-V13-17-page3.txt: [('exam', 'ple')]
AmSn18980428-V13-17-page5.txt: [('A', 'MERICAN')]
AmSn18980505-V13-18-page13.txt: [('mod', 'ern')]
AmSn18980505-V13-18-page14.txt: [('in', 'terest')]
AmSn18980505-V13-18-page5.txt: [('a', 'nd')]
AmSn18980512-V13-19-page14.txt: [('DI', 'ES')]
AmSn18980519-V13-20-page1.txt: [('Chris', 'tian')]
AmSn18980519-V13-20-page14.txt: [('especial', 'ly')]
AmSn18980519-V13-20-page2.txt: [('a', 'id')]
AmSn18980519-V13-20-page6.txt: [('con', 'cern')]
AmSn18980519-V13-20-page7.txt: [('part', 'nership')]
AmSn18980526-V13-21-page14.txt: [('In', 'terest'), ('especial', 'ly')]
AmSn18980526-V13-21-page5.txt: [('meth', 'ods')]
AmSn18980602-V13-22-page14.txt: [('especial', 'ly')]
AmSn18980602-V13-22-page15.txt: [('Add', 'ress')]
AmSn18980602-V13-22-page3.txt: [('par', 'tial')]
AmSn18980602-V13-22-page7.txt: [('s', 'un')]
AmSn18980609-V13-23-page14.txt: [('Com', 'pany'), ('especial', 'ly'), ('HA', 'YS')]
AmSn18980609-V13-23-page15.txt: [('i', 'ri'), ('a', 're'), ('a', 'il'), ('e', 'gg')]
AmSn18980609-V13-23-page2.txt: [('perma', 'nent'), ('per', 'manent')]
AmSn18980609-V13-23-page3.txt: [('ques', 'tion')]
AmSn18980609-V13-23-page9.txt: [('Minis', "ters'")]
AmSn18980616-V13-24-page10.txt: [('Chris', 'tian')]
AmSn18980616-V13-24-page13.txt: [('w', 'ork'), ('fur', 'nished')]
AmSn18980616-V13-24-page14.txt: [('Com', 'pany'), ('especial', 'ly')]
AmSn18980616-V13-24-page15.txt: [('i', 'Cel'), ('Er', 'ie'), ('m', 'oi')]
AmSn18980616-V13-24-page2.txt: [('Colo', 'nel')]
AmSn18980616-V13-24-page7.txt: [('an', 'swer')]
AmSn18980623-V13-25-page14.txt: [('in', 'terest'), ('V', 'aluable'), ('especial', 'ly')]
AmSn18980623-V13-25-page15.txt: [('a', 'll'), ('b', 'ecome'), ('E', 'ra'), ('not', 'ch')]
AmSn18980623-V13-25-page16.txt: [('Bis', 'marck')]
AmSn18980630-V13-26-page11.txt: [('Eng', 'lish')]
AmSn18980630-V13-26-page14.txt: [('especial', 'ly')]
AmSn18980630-V13-26-page15.txt: [('to', 'Il'), ('H', 'ay'), ('i', 'va')]
AmSn18980630-V13-26-page6.txt: [('cor', 'ruptions')]
AmSn18980630-V13-26-page9.txt: [('a', 'lready')]
AmSn18980714-V13-27-page14.txt: [('especial', 'ly')]
AmSn18980714-V13-27-page15.txt: [('I', 're'), ('to', 'Co'), ('r', 'CD'), ('th', 'ese'), ('O', 'ra')]
AmSn18980714-V13-27-page2.txt: [('evan', 'gelical'), ('be', 'lieve')]
AmSn18980714-V13-27-page3.txt: [('so', 'ul')]
AmSn18980714-V13-27-page7.txt: [('IS', 'TH')]
AmSn18980721-V13-28-page10.txt: [('Preside', 'nt')]
AmSn18980721-V13-28-page17.txt: [('to', 'ry')]
AmSn18980721-V13-28-page19.txt: [('especial', 'ly')]
AmSn18980721-V13-28-page6.txt: [('f', 'ol')]
AmSn18980728-V13-29-page10.txt: [('sub', 'jects')]
AmSn18980728-V13-29-page14.txt: [('especial', 'ly')]
AmSn18980728-V13-29-page7.txt: [('symbol', 'ized')]
AmSn18980804-V13-30-page14.txt: [('especial', 'ly')]
AmSn18980804-V13-30-page4.txt: [('ecclesias', 'tically')]
AmSn18980804-V13-30-page8.txt: [('ac', 'cept')]
AmSn18980811-V13-31-page12.txt: [('belie', 'veth')]
AmSn18980811-V13-31-page14.txt: [('especial', 'ly')]
AmSn18980811-V13-31-page16.txt: [('en', 'gagement')]
AmSn18980811-V13-31-page7.txt: [('the', 'se')]
AmSn18980818-V13-32-page14.txt: [('especial', 'ly')]
AmSn18980818-V13-32-page6.txt: [('crim', 'inal')]
AmSn18980825-V13-33-page1.txt: [('as', 'cend')]
AmSn18980825-V13-33-page7.txt: [('do', 'th')]
AmSn18980825-V13-33-page8.txt: [('a', 're')]
AmSn18980901-V13-34-page4.txt: [('in', 'itiative')]
AmSn18980908-V13-35-page12.txt: [('right', 'ful')]
AmSn18980908-V13-35-page2.txt: [('be', 'seeching')]
AmSn18980908-V13-35-page7.txt: [('Equal', 'ity')]
AmSn18980915-V13-36-page15.txt: [('P', 'ress')]
AmSn18980915-V13-36-page2.txt: [('prop', 'erly')]
AmSn18980922-V13-37-page14.txt: [('especial', 'ly')]
AmSn18980922-V13-37-page15.txt: [('cap', 'tivity')]
AmSn18980922-V13-37-page16.txt: [('for', 'eign'), ('Gov', 'ernment')]
AmSn18980922-V13-37-page5.txt: [('ad', 'venturers')]
AmSn18980922-V13-37-page6.txt: [('Cath', 'olics')]
AmSn18980929-V13-38-page13.txt: [('It', "'s")]
AmSn18980929-V13-38-page14.txt: [('especial', 'ly')]
AmSn18980929-V13-38-page15.txt: [('cap', 'tivity')]
AmSn18980929-V13-38-page2.txt: [('the', 'Ca'), ('Ca', 'tholic')]
AmSn18980929-V13-38-page9.txt: [('SEN', 'TINEL')]
AmSn18981006-V13-39-page1.txt: [('GOVERN', 'MENT')]
AmSn18981006-V13-39-page14.txt: [('cap', 'tivity')]
AmSn18981006-V13-39-page15.txt: [('especial', 'ly')]
AmSn18981013-V13-40-page14.txt: [('M', 'OD')]
AmSn18981013-V13-40-page15.txt: [('cap', 'tivity')]
AmSn18981013-V13-40-page16.txt: [('ten', 'dency')]
AmSn18981020-V13-41-page11.txt: [('enforce', 'ment')]
AmSn18981020-V13-41-page14.txt: [('cap', 'tivity'), ('In', 'terpretation'), ('especial', 'ly')]
AmSn18981020-V13-41-page3.txt: [('familiar', 'ity')]
AmSn18981027-V13-42-page15.txt: [('cap', 'tivity'), ('r', 'om'), ('especial', 'ly')]
AmSn18981103-V13-43-page11.txt: [('con', 'flict')]
AmSn18981103-V13-43-page14.txt: [('cap', 'tivity'), ('Kan', 'sas'), ('especial', 'ly')]
AmSn18981110-V13-44-page15.txt: [('cap', 'tivity'), ('especial', 'ly')]
AmSn18981110-V13-44-page6.txt: [('politi', 'cian')]
AmSn18981110-V13-44-page9.txt: [('author', 'ity')]
AmSn18981117-V13-45-page1.txt: [('spirit', 'ual')]
AmSn18981117-V13-45-page14.txt: [('cap', 'tivity'), ('especial', 'ly')]
AmSn18981124-V13-46-page15.txt: [('cap', 'tivity'), ('in', 'terpretation'), ('especial', 'ly')]
AmSn18981201-V13-47-page14.txt: [('cap', 'tivity'), ('In', 'terpretation'), ('especial', 'ly')]
AmSn18981201-V13-47-page16.txt: [('the', 'es'), ('reg', 'ular')]
AmSn18981201-V13-47-page3.txt: [('Cath', 'olic'), ('dig', 'nitaries')]
AmSn18981201-V13-47-page5.txt: [('inter', 'ests')]
AmSn18981201-V13-47-page9.txt: [('in', 'genious')]
AmSn18981208-V13-48-page13.txt: [('engr', 'avings')]
AmSn18981208-V13-48-page14.txt: [('r', 'io')]
AmSn18981208-V13-48-page15.txt: [('cap', 'tivity')]
AmSn18981208-V13-48-page7.txt: [('hap', 'piness')]
AmSn18981215-V13-49-page1.txt: [('GOVERN', 'MENT'), ('gov', 'ernment')]
AmSn18981215-V13-49-page14.txt: [('cap', 'tivity'), ('especial', 'ly')]
AmSn18981215-V13-49-page4.txt: [('ques', 'tion')]
AmSn18981215-V13-49-page5.txt: [('dis', 'seminate')]
AmSn18981229-V13-50-page14.txt: [('cap', 'tivity'), ('c', 'onn'), ('In', 'terpretation'), ('especial', 'ly')]
AmSn18981229-V13-50-page15.txt: [('C', 'itation')]
AmSn18981229-V13-50-page16.txt: [('ha', 'ndier')]
AmSn18981229-V13-50-page2.txt: [('go', 'vernment')]
AmSn18981229-V13-50-page3.txt: [('imperial', 'ist')]
AmSn18981229-V13-50-page7.txt: [('and', 're')]
AmSn18990105-V14-01-page15.txt: [('fascin', 'ating')]
AmSn18990105-V14-01-page2.txt: [('Con', 'gress')]
AmSn18990105-V14-01-page5.txt: [('cen', 'tury')]
AmSn18990112-V14-02-page14.txt: [('engr', 'avings')]
AmSn18990112-V14-02-page15.txt: [('fa', 'mily')]
AmSn18990119-V14-03-page10.txt: [('p', 'urpose')]
AmSn18990119-V14-03-page14.txt: [('un', 'derlying')]
AmSn18990126-V14-04-page11.txt: [('a', 'nd')]
AmSn18990126-V14-04-page13.txt: [('TO', 'OtER')]
AmSn18990126-V14-04-page15.txt: [('fa', 'mily'), ('C', 'itation')]
AmSn18990202-V14-05-page1.txt: [('PRO', 'FESSION')]
AmSn18990202-V14-05-page14.txt: [('r', 'avings')]
AmSn18990202-V14-05-page15.txt: [('t', 'itles'), ('N', 'ev'), ('C', 'itation')]
AmSn18990202-V14-05-page16.txt: [('SEN', 'TINEL')]
AmSn18990202-V14-05-page9.txt: [('We', 'll')]
AmSn18990209-V14-06-page12.txt: [('out', 'generaled')]
AmSn18990209-V14-06-page14.txt: [('cap', 'tivity')]
AmSn18990209-V14-06-page8.txt: [('to', 'ne')]
AmSn18990216-V14-07-page14.txt: [('cap', 'tivity')]
AmSn18990223-V14-08-page14.txt: [('cap', 'tivity')]
AmSn18990302-V14-09-page16.txt: [('SEN', 'TINEL')]
AmSn18990302-V14-09-page6.txt: [('ver', 'acity')]
AmSn18990302-V14-09-page9.txt: [('w', 'hich')]
AmSn18990309-V14-10-page1.txt: [('voice', 'ful')]
AmSn18990309-V14-10-page15.txt: [('P', 'RESS')]
AmSn18990309-V14-10-page16.txt: [('SEN', 'TINEL')]
AmSn18990309-V14-10-page6.txt: [('de', 'clared')]
AmSn18990316-V14-11-page15.txt: [('g', 'oo')]
AmSn18990316-V14-11-page5.txt: [('and', 're')]
AmSn18990316-V14-11-page6.txt: [('lib', 'erty')]
AmSn18990323-V14-12-page11.txt: [('en', 'gaged')]
AmSn18990323-V14-12-page15.txt: [('fa', 'mily'), ('O', 'LD')]
AmSn18990323-V14-12-page2.txt: [('gov', 'ernment')]
AmSn18990323-V14-12-page5.txt: [('Christian', 'ity')]
AmSn18990330-V14-13-page10.txt: [('sold', 'iers')]
AmSn18990406-V14-14-page14.txt: [('A', 'bby')]
AmSn18990406-V14-14-page8.txt: [('in', 'vestigations')]
AmSn18990413-V14-15-page14.txt: [('at', 'Li')]
AmSn18990413-V14-15-page16.txt: [('who', 'se')]
AmSn18990420-V14-16-page4.txt: [('min', 'ister')]
AmSn18990427-V14-17-page13.txt: [('chap', 'lains')]
AmSn18990427-V14-17-page8.txt: [('f', 'ollowing')]
AmSn18990504-V14-18-page15.txt: [('scien', 'tific')]
AmSn18990511-V14-19-page14.txt: [('Sanct', 'uary'), ('phys', 'ical'), ('FOR', 'EIGN')]
AmSn18990518-V14-20-page10.txt: [('pro', 'hibit')]
AmSn18990518-V14-20-page14.txt: [('A', 'li'), ('FOR', 'EIGN')]
AmSn18990525-V14-21-page1.txt: [('p', 'erson')]
AmSn18990525-V14-21-page15.txt: [('T', 'iP')]
AmSn18990601-V14-22-page1.txt: [('A', 'verted')]
AmSn18990601-V14-22-page11.txt: [('com', 'pelled')]
AmSn18990601-V14-22-page14.txt: [('tim', 'es'), ('mission', 'ary')]
AmSn18990601-V14-22-page15.txt: [('A', 'rIAN')]
AmSn18990608-V14-23-page13.txt: [('Sanct', 'uary')]
AmSn18990608-V14-23-page15.txt: [('AM', 'ERICAN')]
AmSn18990608-V14-23-page2.txt: [('Ha', 'tley')]
AmSn18990608-V14-23-page3.txt: [('a', 'nd')]
AmSn18990615-V14-24-page13.txt: [('l', 'ee'), ('FOR', 'EIGN'), ('Boa', 'rd')]
AmSn18990615-V14-24-page14.txt: [('A', 'ttention'), ('revo', 'lutions')]
AmSn18990615-V14-24-page6.txt: [('per', 'sonality')]
AmSn18990615-V14-24-page7.txt: [('confer', 'ence'), ('unright', 'eous')]
AmSn18990622-V14-25-page11.txt: [('Chi', 'cago')]
AmSn18990622-V14-25-page15.txt: [('fa', 'ther')]
AmSn18990622-V14-25-page16.txt: [('appear', 'ance')]
AmSn18990706-V14-26-page14.txt: [('A', 'rIAN')]
AmSn18990706-V14-26-page15.txt: [('O', 'ther'), ('SAVONA', 'ROLA')]
AmSn18990706-V14-26-page16.txt: [('SEN', 'TINEL'), ('Chi', 'cago')]
AmSn18990706-V14-26-page4.txt: [('author', 'ity')]
AmSn18990713-V14-27-page15.txt: [('cap', 'tivity')]
AmSn18990720-V14-28-page11.txt: [('c', 'onsideration')]
AmSn18990720-V14-28-page15.txt: [('cap', 'tivity')]
AmSn18990727-V14-29-page13.txt: [('O', 'ver')]
AmSn18990727-V14-29-page14.txt: [('t', 'aken')]
AmSn18990727-V14-29-page15.txt: [('cap', 'tivity'), ('W', 'orld')]
AmSn18990727-V14-29-page3.txt: [('di', 'recting')]
AmSn18990803-V14-30-page15.txt: [('C', 'itation')]
AmSn18990803-V14-30-page8.txt: [('a', 'nd')]
AmSn18990803-V14-30-page9.txt: [('de', 'livered')]
AmSn18990810-V14-31-page15.txt: [('inven', 'tions')]
AmSn18990810-V14-31-page16.txt: [('un', 'derlying')]
AmSn18990810-V14-31-page4.txt: [('Evi', 'dently')]
AmSn18990817-V14-32-page15.txt: [('A', 'lAN')]
AmSn18990824-V14-33-page13.txt: [('le', 'ft')]
AmSn18990824-V14-33-page14.txt: [('C', 'oth')]
AmSn18990824-V14-33-page7.txt: [('pro', 'posals')]
AmSn18990831-V14-34-page12.txt: [('dis', 'tinguished')]
AmSn18990831-V14-34-page14.txt: [('C', 'oth')]
AmSn18990831-V14-34-page15.txt: [('A', 'rIAN')]
AmSn18990907-V14-35-page12.txt: [('Ascend', 'ancy')]
AmSn18990907-V14-35-page3.txt: [('fur', 'ther')]
AmSn18990914-V14-36-page2.txt: [('Roma', 'nism'), ('Phil', 'ippines')]
AmSn18990921-V14-37-page6.txt: [('to', 'iled')]
AmSn18990928-V14-38-page13.txt: [('or', 'cein')]
AmSn18990928-V14-38-page6.txt: [('con', 'nected')]
AmSn18991005-V14-39-page11.txt: [('Com', 'mitting')]
AmSn18991005-V14-39-page4.txt: [('a', 'nd')]
AmSn18991012-V14-40-page14.txt: [('K', 'ANSAS')]
AmSn18991012-V14-40-page15.txt: [('e', 're')]
AmSn18991012-V14-40-page2.txt: [('SEN', 'TINEL')]
AmSn18991019-V14-41-page14.txt: [('b', 'aptist')]
AmSn18991019-V14-41-page6.txt: [('earn', 'estness')]
AmSn18991026-V14-42-page11.txt: [('equal', 'ity')]
AmSn18991026-V14-42-page15.txt: [('In', 'terpretation')]
AmSn18991026-V14-42-page2.txt: [('move', 'ment')]
AmSn18991026-V14-42-page4.txt: [('la', 'xer')]
AmSn18991026-V14-42-page6.txt: [('cat', 'echisms'), ('d', 'ay')]
AmSn18991026-V14-42-page9.txt: [('a', 'nd')]
AmSn18991102-V14-43-page11.txt: [('cir', 'culated')]
AmSn18991102-V14-43-page14.txt: [('Y', 'OE')]
AmSn18991102-V14-43-page16.txt: [('Eng', 'lish')]
AmSn18991102-V14-43-page2.txt: [('con', 'gress')]
AmSn18991102-V14-43-page3.txt: [('and', 're')]
AmSn18991102-V14-43-page9.txt: [('and', 'rE')]
AmSn18991109-V14-44-page2.txt: [('j', 'ournal')]
AmSn18991116-V14-45-page16.txt: [('e', 're')]
AmSn18991116-V14-45-page17.txt: [('Chris', 'tians')]
AmSn18991123-V14-46-page13.txt: [('E', 'xamination')]
AmSn18991123-V14-46-page2.txt: [('com', 'manded')]
AmSn18991130-V14-47-page5.txt: [('t', 'hese')]
AmSn18991130-V14-47-page7.txt: [('AME', 'RICAN')]
AmSn18991207-V14-48-page12.txt: [('be', 'na')]
AmSn18991214-V14-49-page2.txt: [('Govern', 'ment')]
AmSn18991228-V14-50-page12.txt: [('We', 'll')]
AmSn18991228-V14-50-page16.txt: [('SEN', 'TINEL')]
AmSn19000104-V15-01-page10.txt: [('j', 'ustification')]
AmSn19000104-V15-01-page14.txt: [('Bo', 'nd')]
AmSn19000104-V15-01-page2.txt: [('t', 'ent')]
AmSn19000104-V15-01-page7.txt: [('aggrand', 'izement')]
AmSn19000104-V15-01-page9.txt: [('Amend', 'ment')]
AmSn19000111-V15-02-page11.txt: [('inter', 'fering')]
AmSn19000118-V15-03-page13.txt: [('C', 'loth')]
AmSn19000118-V15-03-page2.txt: [('the', 're')]
AmSn19000125-V15-04-page10.txt: [('Con', 'trary')]
AmSn19000201-V15-05-page1.txt: [('f', 'ollows')]
AmSn19000208-V15-06-page10.txt: [('repre', 'sented')]
AmSn19000208-V15-06-page11.txt: [('Pro', 'Tem')]
AmSn19000208-V15-06-page2.txt: [('a', 'ny')]
AmSn19000208-V15-06-page3.txt: [('sym', 'pathy')]
AmSn19000215-V15-07-page14.txt: [('Work', 'ers')]
AmSn19000215-V15-07-page15.txt: [('O', 'kie')]
AmSn19000215-V15-07-page9.txt: [('c', 'om')]
AmSn19000222-V15-08-page16.txt: [('SEN', 'TINEL')]
AmSn19000301-V15-09-page10.txt: [('Gov', 'ERNOR')]
AmSn19000301-V15-09-page13.txt: [('d', 'iet')]
AmSn19000308-V15-10-page14.txt: [('A', 'IL')]
AmSn19000308-V15-10-page2.txt: [('in', 'struction'), ('fr', 'ee')]
AmSn19000315-V15-11-page16.txt: [('differ', 'ences')]
AmSn19000322-V15-12-page14.txt: [('phys', 'ical')]
AmSn19000322-V15-12-page5.txt: [('en', 'forcement')]
AmSn19000329-V15-13-page9.txt: [('Shan', 'Tung')]
AmSn19000405-V15-14-page2.txt: [('Bap', 'tist')]
AmSn19000405-V15-14-page5.txt: [('nigh', 'tly')]
AmSn19000412-V15-15-page12.txt: [('con', 'cern')]
AmSn19000419-V15-16-page15.txt: [('right', 'eousness')]
AmSn19000426-V15-17-page8.txt: [('command', 'Ment')]
AmSn19000426-V15-17-page9.txt: [('sen', 'timent')]
AmSn19000510-V15-18-page10.txt: [('for', 'th')]
AmSn19000510-V15-18-page14.txt: [('lib', 'erty')]
AmSn19000510-V15-18-page16.txt: [('SEN', 'TINEL'), ('E', 'TC')]
AmSn19000510-V15-18-page2.txt: [('or', 'ganization')]
AmSn19000524-V15-20-page9.txt: [('con', 'quering')]
AmSn19000531-V15-21-page12.txt: [('Fur', 'ther')]
AmSn19000607-V15-22-page1.txt: [('t', 'ee')]
AmSn19000607-V15-22-page5.txt: [('Is', 'rael')]
AmSn19000607-V15-22-page8.txt: [('r', 'oo')]
AmSn19000614-V15-23-page6.txt: [('a', 'pparently')]
AmSn19000628-V15-25-page5.txt: [('PRO', 'GRESSIVE')]
AmSn19000712-V15-27-page12.txt: [('Kiang', 'Si'), ('Kiang', 'Su')]
AmSn19000712-V15-27-page13.txt: [('E', 'RT')]
AmSn19000719-V15-28-page12.txt: [('u', 'nwarranted')]
AmSn19000719-V15-28-page2.txt: [('Refor', 'mation'), ('the', 'Refor')]
AmSn19000719-V15-28-page6.txt: [('men', 'tioned'), ('in', 'dividuals'), ('relin', 'quishes'), ('and', 're'), ('ac', 'quired')]
AmSn19000719-V15-28-page7.txt: [('lib', 'erty'), ('Chris', 'tian')]
AmSn19000719-V15-28-page8.txt: [('perse', 'cutions')]
AmSn19000726-V15-29-page2.txt: [('em', 'inent'), ('an', 'em')]
AmSn19000726-V15-29-page9.txt: [('a', 'ppointed')]
AmSn19000802-V15-30-page13.txt: [('th', 'eir')]
AmSn19000802-V15-30-page16.txt: [('Chris', 'tendom')]
AmSn19000802-V15-30-page5.txt: [('mon', 'archy')]
AmSn19000809-V15-31-page10.txt: [('relig', 'ious')]
AmSn19000809-V15-31-page12.txt: [('on', 'es'), ('g', 'od'), ('con', 'gregation')]
AmSn19000809-V15-31-page16.txt: [('I', 'ndependence')]
AmSn19000809-V15-31-page8.txt: [('Pres', 'ent')]
AmSn19000816-V15-32-page1.txt: [('e', 'ra')]
AmSn19000816-V15-32-page16.txt: [('LIB', 'ERTY')]
AmSn19000816-V15-32-page6.txt: [('utter', 'ance')]
AmSn19000823-V15-33-page13.txt: [('au', 'thorizing')]
AmSn19000823-V15-33-page14.txt: [('val', 'uable')]
AmSn19000823-V15-33-page16.txt: [('ob', 'liged')]
AmSn19000823-V15-33-page6.txt: [('Meth', 'odists')]
AmSn19000830-V15-34-page12.txt: [('or', 'dered')]
AmSn19000830-V15-34-page2.txt: [('r', 'ights')]
AmSn19000906-V15-35-page10.txt: [('con', 'cerned')]
AmSn19000906-V15-35-page11.txt: [('an', 'ything'), ('car', 'ried'), ('the', 're')]
AmSn19000906-V15-35-page12.txt: [('in', 'terpOsed')]
AmSn19000906-V15-35-page15.txt: [('THE', 'SE')]
AmSn19000906-V15-35-page16.txt: [('dis', 'tinguish')]
AmSn19000906-V15-35-page2.txt: [('e', 'xistence')]
AmSn19000906-V15-35-page6.txt: [('live', 'th')]
AmSn19000913-V15-36-page14.txt: [('Nor', 'theastern')]
AmSn19000920-V15-37-page16.txt: [('THE', 'SE')]
AmSn19000928-V15-38-page15.txt: [('W', 'IZ')]
AmSn19000928-V15-38-page16.txt: [('institu', 'tions')]
AmSn19000928-V15-38-page6.txt: [('Conscien', 'ce')]
AmSn19001004-V15-39-page10.txt: [('vici', 'ous')]
AmSn19001004-V15-39-page14.txt: [('subscrip', 'tion')]
AmSn19001004-V15-39-page2.txt: [('and', 'es'), ('es', 'pecially')]
AmSn19001004-V15-39-page5.txt: [('cru', 'cified')]
AmSn19001011-V15-40-page11.txt: [('gov', 'ernment')]
AmSn19001011-V15-40-page15.txt: [('Y', 'ou')]
AmSn19001011-V15-40-page6.txt: [('r', 'esult')]
AmSn19001018-V15-41-page10.txt: [('in', 'TI'), ('gro', 'und')]
AmSn19001018-V15-41-page14.txt: [('SEN', 'TINEL')]
AmSn19001018-V15-41-page16.txt: [('the', 'se')]
AmSn19001018-V15-41-page2.txt: [('on', 'es'), ('govern', 'ment')]
AmSn19001018-V15-41-page4.txt: [('ad', 'justed'), ('a', 'lWays'), ('suc', 'ceeding')]
AmSn19001018-V15-41-page5.txt: [('mo', 'rality'), ('phi', 'losophy')]
AmSn19001018-V15-41-page8.txt: [('exer', 'cise')]
AmSn19001025-V15-42-page10.txt: [('condi', 'tions')]
AmSn19001025-V15-42-page11.txt: [('interfer', 'ence'), ('settle', 'ment')]
AmSn19001025-V15-42-page12.txt: [('be', 'lieve')]
AmSn19001025-V15-42-page13.txt: [('men', 'tion')]
AmSn19001025-V15-42-page14.txt: [('SEN', 'TINEL'), ('subscrip', 'tion')]
AmSn19001025-V15-42-page16.txt: [('Sund', 'ay'), ('Nu', 'NN')]
AmSn19001025-V15-42-page4.txt: [('ob', 'viously'), ('per', 'sisted')]
AmSn19001025-V15-42-page7.txt: [('guar', 'antee')]
AmSn19001101-V15-43-page12.txt: [('frequent', 'ers'), ('BaP', 'tists')]
AmSn19001101-V15-43-page13.txt: [('disC', 'ouraged')]
AmSn19001101-V15-43-page14.txt: [('L', 'iberty')]
AmSn19001101-V15-43-page16.txt: [('de', 'mand')]
AmSn19001101-V15-43-page5.txt: [('wor', 'shiping'), ('dic', 'tates')]
AmSn19001101-V15-43-page6.txt: [('mission', 'ary')]
AmSn19001108-V15-44-page15.txt: [('SEN', 'TINEL'), ('A', 'DDRESS')]
AmSn19001108-V15-44-page16.txt: [('enforce', 'ment')]
AmSn19001108-V15-44-page5.txt: [('re', 'joicings'), ('the', 're')]
AmSn19001115-V15-45-page1.txt: [('e', 'CO'), ('a', 'll'), ('a', 'ssumed')]
AmSn19001115-V15-45-page13.txt: [('federa', 'tion')]
AmSn19001115-V15-45-page14.txt: [('hand', 'somely')]
AmSn19001115-V15-45-page16.txt: [('ad', 'vocateS')]
AmSn19001115-V15-45-page6.txt: [('gov', 'ernments'), ('Cath', 'olics')]
AmSn19001122-V15-46-page10.txt: [('N', 'eVertheless')]
AmSn19001122-V15-46-page11.txt: [('depart', 'ment'), ('in', 'dicated')]
AmSn19001122-V15-46-page13.txt: [('ques', 'tion')]
AmSn19001122-V15-46-page16.txt: [('m', 'ab')]
AmSn19001122-V15-46-page2.txt: [('gov', 'erned')]
AmSn19001122-V15-46-page7.txt: [('h', 'oy')]
AmSn19001129-V15-47-page11.txt: [('to', 're')]
AmSn19001129-V15-47-page13.txt: [('pene', 'trated'), ('per', 'sonal')]
AmSn19001129-V15-47-page15.txt: [('n', 'eeds')]
AmSn19001129-V15-47-page16.txt: [('a', 're')]
AmSn19001129-V15-47-page4.txt: [('en', 'largeth')]
AmSn19001206-V15-48-page3.txt: [('the', 'Es')]
AmSn19001206-V15-48-page6.txt: [('Cong', 'resSman'), ('Con', 'gress')]
AmSn19001206-V15-48-page7.txt: [('pri', 'marily'), ('cer', 'tainly'), ('no', 'es')]
AmSn19001206-V15-48-page8.txt: [('Vir', 'ginia')]
AmSn19001220-V15-50-page12.txt: [('c', 'urch'), ('or', 'ganized')]
AmSn19001220-V15-50-page6.txt: [('Massa', 'chusetts'), ('colo', 'nies'), ('state', 'ment')]
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction7 Average verified rate: 0.9830117359134304 Average of error rates: 0.018671590569979114 Total token count: 8363303
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )
[("'", 8053),
('t', 4426),
('e', 3978),
('d', 3950),
('w', 3754),
('co', 3408),
('m', 3176),
('n', 3046),
('f', 2035),
('r', 2028),
('th', 1645),
('g', 1371),
('mo', 1160),
('u', 926),
('x', 864),
('ex', 521),
('pa', 410),
('q', 399),
('sunday-law', 334),
('k', 315),
("the'", 304),
('pp', 299),
('tion', 276),
("conscience'", 260),
('ch', 253),
('seventhday', 249),
('re', 224),
('ga', 220),
('oc', 218),
('z', 215),
('wm', 215),
('satolli', 210),
('employes', 209),
('munn', 207),
('ti', 200),
('id', 181),
('un', 173),
('ry', 170),
('al', 166),
('sunday-closing', 160),
('ca', 151),
('ment', 146),
('chain-gang', 136),
("to'", 134),
('nd', 130),
('ll', 128),
('lb', 125),
('il', 123),
('bateham', 122),
('cmsar', 121),
('se', 120),
('aleck', 112),
("and'", 109),
('socalled', 106),
('sunday-rest', 104),
('sentin', 104),
('milly', 103),
('cc', 101),
('te', 101),
('erican', 99),
("of'", 98),
('va', 95),
('nt', 92),
('fellow-citizens', 92),
('vt', 92),
('cd', 92),
('tt', 89),
('aa', 89),
('op', 89),
('ft', 88),
('-', 88),
("a'", 86),
('eze', 84),
('attorney-general', 83),
('ma', 82),
('csar', 81),
('stundists', 80),
('cereola', 79),
('lc', 79),
('neander', 78),
('religio-political', 76),
('mc', 74),
('law-abiding', 74),
('sundaylaw', 74),
('rican', 73),
('tions', 72),
('ay', 72),
('li', 72),
('edmunds', 71),
('ni', 71),
('ra', 71),
("crafts's", 70),
('rd', 70),
('ia', 70),
("is'", 69),
('sr', 69),
('freethought', 68),
("in'", 67),
('si', 67),
("crafts'", 66),
("folks'", 66),
("cmsar's", 66),
('un-american', 64),
('sabbath-day', 63),
('rest-day', 62),
('sabbath-breaking', 60),
("''", 59),
('pr', 58),
('mt', 58),
("'s", 58),
('ic', 57),
('ac', 57),
('ne', 56),
('candidus', 56),
("barbers'", 56),
('paeifie', 56),
('na', 55),
('tregelles', 54),
('geikie', 54),
('ie', 54),
("that'", 54),
('ican', 53),
("an'", 53),
('ity', 53),
('dred', 53),
('employe', 52),
("it'", 52),
('ky', 51),
('assoeiation', 51),
('ob', 51),
('litt', 51),
('ri', 50),
('ernment', 50),
('wellknown', 50),
('coxey', 50),
('ci', 50),
('ofthe', 49),
('tional', 49),
('ce', 49),
('es', 49),
('ments', 49),
('vo', 48),
('aro', 48),
('pre', 47),
('fa', 47),
('ph', 47),
('cathedra', 46),
("jones'", 46),
('ow', 45),
('leiper', 45),
('ro', 45),
('forit', 45),
('ent', 45),
("citizens'", 44),
('mi', 43),
('sundayclosing', 43),
('judefind', 42),
('ple', 42),
('sh', 42),
('erty', 42),
('times-herald', 42),
('chaingang', 42),
('tischendorf', 42),
('ei', 41),
('bo', 41),
("o'keefe", 41),
('hiberty', 41),
('cr', 41),
("be'", 41),
("law'", 40),
('wo', 40),
('sunday-keeping', 40),
('ea', 40),
('yo', 40),
('chapelle', 40),
('ct', 40),
('tinel', 40),
('em', 40),
("i'", 39),
('ip', 39),
('keane', 39),
('tv', 39),
('copygraph', 39),
("waterman's", 38),
('lachmann', 38),
('tian', 38),
('oi', 38),
('kai', 38),
('ica', 38),
("cruden's", 38),
('ers', 37),
('non-sectarian', 37),
('adress', 37),
("csar's", 37),
('saye', 37),
('church-and-state', 37),
("american'", 37),
('coxe', 36),
('ta', 36),
('io', 36),
('tr', 36),
('dwyer', 36),
('oa', 35),
("for'", 35),
("not'", 35),
('pf', 35),
('tir', 35),
('td', 35),
('mass-meeting', 35),
('swiggart', 35),
("this'", 35),
('fi', 35),
('az', 35),
('law-making', 34),
('ance', 34),
('da', 34),
('first-day', 34),
('jagoe', 34),
('inthe', 34),
('godgiven', 34),
("church'", 34),
('brunot', 33),
('ther', 33),
('cwsar', 33),
('gious', 33),
('entinel', 33),
('eh', 33),
('cl', 33),
('sa', 32),
('ss', 32),
("as'", 32),
('vice-presidents', 32),
('base-ball', 32),
('ap', 32),
('stitution', 32),
("infants'", 32),
('ba', 32),
('saloon-keepers', 32),
('oo', 31),
('lt', 31),
('sun-worship', 31),
("liberty'", 31),
('ts', 31),
('one-seventh', 31),
('rt', 31),
('prayer-meeting', 31),
('slattery', 31),
('colitical', 30),
('efical', 30),
('mn', 30),
('rs', 30),
('vox', 30),
('fr', 30),
('ao', 29),
('os', 29),
('mg', 29),
("are'", 29),
("roberts'", 29),
('ge', 29),
('tc', 29),
('rn', 29),
('kauffman', 29),
('fo', 29),
('ns', 29),
('crowther', 29),
("workingmen's", 29),
('puplishing', 29),
("all'", 29),
('holidayism', 28),
('oe', 28),
('self-preservation', 28),
('street-cars', 28),
("parkhurst's", 28),
('charta', 28),
('dei', 28),
('newyork', 28),
('durborow', 28),
('liberty-loving', 28),
('zwiebach', 28),
("fathers'", 28),
('non-catholics', 28),
('tl', 28),
('fair-minded', 27),
("krug's", 27),
('merican', 27),
("sabbath'", 27),
('non-observance', 27),
('ful', 27),
('iu', 27),
('schurman', 27),
("cushing's", 27),
('mccauley', 27),
('self-defense', 27),
('theo', 27),
('fellow-man', 27),
("gibbons'", 27),
("or'", 26),
('counter-arguments', 26),
('sabbathkeeping', 26),
('ablegate', 26),
('fora', 26),
('fide', 26),
('platt', 26),
('thon', 26),
("with'", 26),
('itis', 26),
('hto', 26),
('bula', 26),
("god'", 26),
('ve', 26),
("which'", 26),
('pecci', 26),
('divinely-appointed', 26),
('ae', 26),
('non-religious', 26),
("by'", 26),
('selfgovernment', 26),
('ence', 25),
("lions'", 25),
('weakley', 25),
('eferson', 25),
('nethinim', 25),
('weyler', 25),
('feligious', 25),
('ig', 25),
('ou', 25),
('fah', 25),
("d'aubigne", 25),
('martinelli', 25),
('tn', 25),
("at'", 25),
('sundayschool', 25),
('ous', 25),
('ridpath', 25),
('tne', 25),
('publishinc', 25),
('loth', 24),
('krug', 24),
('ceesar', 24),
('stuttle', 24),
('mehan', 24),
('tothe', 24),
('tiie', 24),
('ligion', 24),
("preachers'", 24),
("if'", 24),
('phelan', 24),
('ws', 24),
('ut', 23),
('humbert', 23),
('lawabiding', 23),
('twentyfive', 23),
('atterbury', 23),
('sunday-sabbath', 23),
('nn', 23),
('arierican', 23),
('ble', 23),
("saints'", 23),
('ber', 23),
('om', 23),
('ious', 23),
('tbe', 22),
('anb', 22),
('ili', 22),
('ef', 22),
('ib', 22),
('bt', 22),
('tb', 22),
('ligious', 22),
("have'", 22),
('ab', 22),
('scudder', 22),
('pany', 22),
('sel', 22),
('wi', 22),
('gi', 22),
('anti-christian', 22),
("we'", 22),
('jeferson', 22),
('xact', 21),
("pub'rs", 21),
("grocers'", 21),
("e'", 21),
('comegys', 21),
('scovel', 21),
('sevent', 21),
('po', 21),
('mee', 21),
('witham', 21),
('thos', 21),
('ng', 21),
('yefferson', 21),
('ive', 21),
("sunday'", 21),
('notgive', 21),
('postmaster-general', 20),
("hutchings'", 20),
('washburne', 20),
("he'", 20),
('religiopolitical', 20),
('kellog', 20),
('romer', 20),
('healthgiving', 20),
("soldiers'", 20),
("satolli's", 20),
('mcglynn', 20),
('sien', 20),
('ject', 20),
("from'", 20),
('ee', 20),
("hours'", 20),
('ary', 20),
('pt', 20),
('anierican', 20),
('rosemond', 20),
("vick's", 20),
('parens', 20),
('bythe', 20),
("on'", 20),
('longnecker', 20),
("was'", 20),
("printers'", 19),
('governor-general', 19),
('anierica', 19),
('pressense', 19),
('fbr', 19),
('micr', 19),
('everts', 19),
('rorabacher', 19),
("pastors'", 19),
("gov't", 19),
('tp', 19),
('iti', 19),
('rr', 19),
('seelye', 19),
('arther', 19),
('wishart', 19),
("people'", 19),
('cosgrove', 19),
('gt', 19),
('det', 19),
('lery', 19),
('abbe', 19),
('stundist', 19),
("day'", 19),
('hagans', 19),
('montefiore', 19),
("will'", 19),
('chain-gangs', 19),
('law-makers', 19),
('sundaykeeping', 18),
('dc', 18),
('reli', 18),
('tae', 18),
('od', 18),
('enright', 18),
('anti-catholic', 18),
('non-interference', 18),
('tht', 18),
('sas', 18),
('oz', 18),
('efferson', 18),
('ible', 18),
("th'", 18),
('tianity', 18),
('tarawera', 18),
('curlett', 18),
('tii', 18),
('ey', 18),
('tolstoi', 18),
('wa', 18),
('self-styled', 18),
('--', 18),
("would'", 18),
('ccesar', 18),
('oity', 18),
('avery-stuttle', 18),
('nnw', 17),
('mal', 17),
('bf', 17),
('prin', 17),
("righteousness'", 17),
('jt', 17),
('clingman', 17),
('cedarquist', 17),
('newyorkcity', 17),
('tra', 17),
('ricans', 17),
('saloon-keeper', 17),
('rubiana', 17),
('eral', 17),
('prisot', 17),
('post-offices', 17),
('theunited', 17),
('ies', 17),
('nu', 17),
('ol', 17),
("no'", 17),
('fl', 17),
('sabbathbreaking', 17),
("a'nan", 17),
('ress', 17),
('sommerville', 17),
('ation', 17),
('church-going', 17),
('cood', 17),
('mullally', 17),
('self-governing', 17),
('nel', 17),
('um', 17),
('bondst', 17),
('philpott', 17),
('law-breaker', 17),
('ik', 17),
('senti', 17),
('ame', 17),
('leivites', 17),
('pel', 17),
("apostles'", 17),
('hy', 17),
("schaff's", 16),
('dieu', 16),
('selfevident', 16),
('dayto', 16),
('ioo', 16),
('tf', 16),
('prepartion', 16),
('cp', 16),
("enright's", 16),
("his'", 16),
('mit', 16),
('relig', 16),
('thepeople', 16),
('sie', 16),
('alfaro', 16),
('symmachus', 16),
('xl', 16),
('ples', 16),
('facto', 16),
('erromanga', 16),
('sunday-keepers', 16),
('dividual', 16),
('peryear', 16),
('peffer', 16),
('re-enact', 16),
('ish', 16),
('socialpurity', 16),
('ith', 16),
('cs', 16),
('wilkie', 16),
("l'", 16),
('ul', 16),
('hodgson', 16),
('basle', 16),
('bas', 16),
('eousness', 16),
('zi', 15),
("who'", 15),
('ite', 15),
('sabbath-breakers', 15),
('americansentinel', 15),
('ag', 15),
('rhe', 15),
('nonsuch', 15),
('pepsia', 15),
('co-workers', 15),
('gallinger', 15),
('labberton', 15),
('thatthe', 15),
('intrust', 15),
('lttra', 15),
('ork', 15),
('aw', 15),
('law-breakers', 15),
('milman', 15),
('rampolla', 15),
("christian'", 15),
('wellbeing', 15),
("milman's", 15),
('klip', 15),
('bi', 15),
('ons', 15),
('ctesar', 15),
("their'", 15),
('re-enacted', 15),
('populi', 15),
('governinent', 15),
('wor', 15),
('hach', 15),
('sc', 15),
("miles'", 15),
("ginn's", 15),
('ih', 15),
('janes', 15),
('ov', 15),
('sulus', 15),
('stinday', 15),
('xo', 15),
('ist', 15),
('ectarian', 15),
("o'gorman", 15),
('tkt', 15),
("they'", 15),
('alvierica', 15),
('je', 15),
('birney', 15),
("religion'", 15),
('avery-stiittle', 15),
('sf', 15),
('np', 14),
('thb', 14),
('qa', 14),
("pres'ts", 14),
('secker', 14),
('intelligeneer', 14),
("'the", 14),
('yeferson', 14),
('self-exaltation', 14),
("tourists'", 14),
('chiniquy', 14),
('rittenhouse', 14),
('ormore', 14),
("moses'", 14),
('peo', 14),
('goverment', 14),
('plete', 14),
('lished', 14),
('thp', 14),
("sup'ts", 14),
("gault's", 14),
('oth', 14),
('cortlandt', 14),
('non-union', 14),
('br', 14),
("ccesar's", 14),
('anglo-saxons', 14),
('christain', 14),
('restday', 14),
('su', 14),
('nr', 14),
('rv', 14),
('eemperance', 14),
('sabbath-breaker', 14),
('gb', 14),
('tains', 14),
("mcallister's", 14),
('malum', 14),
("williams'", 14),
("neander's", 14),
("adventists'", 14),
('lexow', 14),
('confreres', 14),
('thr', 14),
('ncluding', 14),
('af', 14),
('sient', 14),
('tution', 14),
('gl', 14),
('tennesseeans', 14),
('mu', 14),
("but'", 13),
('fon', 13),
('ver', 13),
('christ-like', 13),
('aivierican', 13),
('leaguers', 13),
('wu', 13),
("mf'g", 13),
('hoc', 13),
('dibbs', 13),
('anti-religious', 13),
('ntinel', 13),
('ual', 13),
('themies', 13),
('dont', 13),
('ex-president', 13),
('gr', 13),
("one'", 13),
('two-horned', 13),
('rp', 13),
("coxey's", 13),
('higinbotham', 13),
("t'", 13),
("protestants'", 13),
('pilman', 13),
('froni', 13),
('foi', 13),
('meeting-house', 13),
('mccourt', 13),
('thd', 13),
('waupon', 13),
("f'", 13),
("has'", 13),
('forthe', 13),
('itt', 13),
('hiscock', 13),
('sp', 13),
('self-contradictory', 13),
("torry's", 13),
('cif', 13),
("its'", 13),
('dn', 13),
('princi', 13),
('cer', 13),
('thi', 13),
('ec', 13),
('hee', 13),
('sabbathkeepers', 13),
('lelvites', 13),
('one-man', 13),
('tms', 13),
('rundschau', 13),
('tlie', 13),
('tax-payers', 13),
('non-christian', 13),
('self-appointed', 13),
("breeders'", 13),
('kossean', 13),
('olesen', 13),
('botkine', 13),
('ny', 13),
('volksraad', 13),
("whaley's", 12),
('tem', 12),
('constitu', 12),
('ess', 12),
('froin', 12),
('robb', 12),
('theire', 12),
('thein', 12),
('ors', 12),
('ical', 12),
('chappelle', 12),
("churches'", 12),
('self-constituted', 12),
('wouldbe', 12),
('entin', 12),
('week-day', 12),
('thority', 12),
('fast-day', 12),
("were'", 12),
('ex-mayor', 12),
('fortynine', 12),
('ture', 12),
('bok', 12),
('whitall', 12),
("cents'", 12),
('sition', 12),
('tte', 12),
('self-interest', 12),
('croker', 12),
("d'aubigne's", 12),
('merous', 12),
('cai', 12),
('combatting', 12),
('observa', 12),
('fp', 12),
('yonx', 12),
('gainst', 12),
("such'", 12),
('ht', 12),
('ds', 12),
('masse', 12),
('self-respecting', 12),
('pc', 12),
('ivierican', 12),
('tobe', 12),
("do'", 12),
("christ'", 12),
('ki', 12),
('ddress', 12),
("neat's", 12),
('twenty-fifth', 12),
('ttin', 12),
('maurer', 12),
('bondstreet', 12),
('inter-state', 12),
('lation', 12),
('ang', 12),
("any'", 12),
('rk', 12),
('gx', 12),
('sunday-observance', 12),
('havergal', 11),
("james'", 11),
('olic', 11),
('thechurch', 11),
('sm', 11),
('cz', 11),
('df', 11),
("dealers'", 11),
('ke', 11),
('ets', 11),
('pm', 11),
('ex-senator', 11),
('lieve', 11),
('uncompromis', 11),
('mm', 11),
('ine', 11),
('sherk', 11),
('fifty-second', 11),
('selfpreservation', 11),
('derstanding', 11),
('naw', 11),
('tre', 11),
("states'", 11),
('theni', 11),
("state'", 11),
('communica', 11),
('rose-wood', 11),
('androscoggin', 11),
("bakers'", 11),
('sk', 11),
('taschereau', 11),
('qt', 11),
('tm', 11),
('griffitts', 11),
('fellow-workers', 11),
('kw', 11),
('bradfield', 11),
('houk', 11),
('fot', 11),
("so'", 11),
("'a", 11),
('amyot', 11),
('muskoka', 11),
('pl', 11),
('aivierica', 11),
('erties', 11),
('qf', 11),
('haye', 11),
('ost', 11),
('isthepapacyinprophecy', 11),
('sev', 11),
('rian', 11),
('mits', 11),
('notre', 11),
('key-note', 11),
('hirsch', 11),
('sealings', 11),
('rin', 11),
('evil-doers', 11),
('nott', 11),
("civil'", 11),
('theseventh', 11),
('ml', 11),
('kee', 11),
('yr', 11),
('gress', 11),
('ex-governor', 11),
('cramer', 11),
('lr', 11),
('fs', 11),
('informations', 11),
('paoipio', 11),
('twentyfour', 11),
('ridgetown', 11),
('axact', 11),
('times-democrat', 11),
('tians', 11)]
Review Remaining Errors¶
reports.docs_with_high_error_rate( summary , min_error_rate = .2 )
[('AmSn18900918-V05-37-page4.txt', 0.495),
('AmSn18900918-V05-37-page1.txt', 0.472),
('AmSn18900918-V05-37-page8.txt', 0.459),
('AmSn18900918-V05-37-page5.txt', 0.413),
('AmSn18970701-V12-26-page1.txt', 0.404),
('AmSn18980616-V13-24-page15.txt', 0.371),
('AmSn18980630-V13-26-page15.txt', 0.356),
('AmSn18980609-V13-23-page15.txt', 0.351),
('AmSn18980623-V13-25-page15.txt', 0.335),
('AmSn18980714-V13-27-page15.txt', 0.325),
('AmSn18960220-V11-08-page7.txt', 0.262),
('AmSn18971007-V12-39-page16.txt', 0.246),
('AmSn18971014-V12-40-page16.txt', 0.238),
('AmSn18951031-V10-43-page7.txt', 0.224),
('AmSn18951219-V10-50-page7.txt', 0.215),
('AmSn18951024-V10-42-page7.txt', 0.204)]
docs_2_check = [x[0] for x in reports.docs_with_high_error_rate( summary , min_error_rate = .2 ) if x[1] > 0.2]
# utilities.open_original_docs(docs_2_check, directories['cycle'])
There are two main drivers of the OCR errors. First, the original scans of AmSn18900918-V05-37 captured either the layer behind or the shadow of text on the next page. This makes clean OCR all but impossible from that scan. The other major driver of errors is the advertisement sections of the publications, particularly ads for an Interlinear Greek New Testament.
Check Long Errors¶
reports.long_errors(errors_summary, min_length=15)
(['intensely-orthodox', 'rezteoxfebeiloenir', 'virreasetiabwayi', 'pleasant-spirited', 'eheapserviceable', 'poreversepalialf', 'estabtablishment', 'pfopositionbliat', 'christiancitizenship', 'cliiynirtreczenanmouesna', 'themonthlypaymentwillbe', 'choochee-choochee', "sup'ercalendered", 'laicciohuasrmaniangstsrtsltyittzfhefrir', 'massachusetts--mr', 'malrithinrinodths', 'gamblingfraternity', 'theoriesodfisease', 'sabbath-profanation', 'subscripmountains', 'forty-eight-page', 'couldfollybegreaterthanpraying', 'increasing-favor', "considerable'part", 'non-establishment', 'commonwealthsaid', 'theseprosecutions', 'poll-parrot-wise', "administered'that", 'theamericansabbathunion', "notwithstanding'it", 'stylesofvehicles', 'themselvesandffarrless', 'anti-prohibition', 'theflorentinemartyr', 'poverty-stricken', 'duetonasalcatarrh', 'supportingbustles', 'zarassewmesseepmaimpaw', 'petitionsofthelongislandfishermen', 'christianstatesman', 'icarapriociaovrat', 'statute-intrenched', 'theyshallbedoneaway', 'office-worshiping', 'elfqpronocincing', "prornittetitpapets'are", 'andwasonlywaiting', 'ouriettidrofbthe', 'elattliimiiiiimi', 'counterpetitions', 'ioutlinesvividlytherelationthatexistedbetween', 'languagearchbishop', "orton'simproveddrenchinggil", 'notwiamstannfrfo', 'catholicsunderstand', "jitdge'pennypacker", 'nationalreligiousliberty', 'sunday-observance', "and'unmistakably", 'beaphjseabrighklong', 'gold-from-sea-water', 'andtheschemewentthrough', 'peorepresentative', 'iiiiiiiiiiiiiiiii', 'statuteintrenched', 'foreignnationality', 'mmsmwtimmmuummlimmw', 'associationssentafrom', 'iniquity-steeped', 'nineieeathncentury', 'commissioner-general', 'politisentiments', 'anti-imperialists', 'well-constructed', 'chattanoogadaily', 'secretary--foster', 'fellow-petitioners', 'andsugarbeetland', 'penny-in-the-slot', "remarkable'importation", 'stalwart-looking', 'magistratebelieveth', 'printedongoodpaper', 'incompetentreligious', 'advertisementcaptured', 'quasi-partnership', 'indisputabletruth', 'forgivethesetears', 'self-contradiction', 'physicalnecessity', 'ailliliilliiiiiiimiiiiii', 'corporaexplained', 'ailopteclasnytbratedl', 'owisosossorramomontr', 'smallconsideration', 'anxietyofthesundaypreachersisto', 'butthenishallknow', 'self-stultifying', 'stevensonandhisconstituents', 'thebookisneatlygottenup', 'self-stultification', 'ecclesiastisustain', 'imprisonmenttomakemenwiserandtteer', 'following-conclusions', 'beenurgohtintothechurchbythe', 'fellow-religionists', 'questiondestruction', 'karaprieicrovrai', 'comprehensiveview', 'revolutionarywar', 'conversation-published', 'conditionsprevailing', 'anddiseasesofthe', 'condignpunishment', 'anti-imperialism', 'americanimperialism', 'perfectionshould', 'reefeoivredyratrsefeinreqnucierietso', 'alreadyfarniliar', 'therightsofthepeople', 'accordpreservation', 'icldiisoienaawary', 'andhebaselyintimates', 'willianimckinley', 'commanderin-chief', 'extreme-distress-of', 'thesecommunities', 'selfregeneration', 'austria-hungarian', 'non-commissioned', 'vanymoohearrnramedneorastonninneljadyek', 'non-interference', 'publishingcompany', 'national-reform-sabbath-union-sundayclosing', 'iknowevenasalsoi', 'notwithstandingconstitutional', 'actotjejtotactat', 'isfullofhappysur', "worlsd'exposition", 'alnericanbentiuel', "embedding'itself", 'rapidlyincreasing', 'establishredemption', 'liraitedlerritorrreads', 'karapyllobaovrat', 'anti-constitutional', 'determinationforesaid', 'nomorethanfollowing', 'imbibingreligious', 'minister-secretary', 'government-without-the', 'twasintrafalgarsbay', 'comingsolongastheservicesareheld', 'inter-communication', 'weligtonsiliberty', 'constructionsupon', 'iiiiiiiiiiiiiiii', 'politicalatheism', 'idnfvtleilubageiok', 'iiiiimiiiiimmiiiiini', 'religious-persecution', 'tennesseeforbids', 'ever-threatening', 'non-communicating', 'stampswillbeacceptableforsmallremit', 'bibleobjectlessons', "don'ttakeyaonufaingrocuytwuhnatitl", 'refceoivreydeisarsinir', 'religfundamental', 'successfuyllreosssecuthd', 'imiiiiimiiiiimiiiiim', 'gasternppaasssenger', 'uponllegislation', 'supisillustrated', 'socially-degenerating', 'convertedintoaholiday', 'long-anticipated', 'conimissioicfrem', 'andadvanceordershavebeenreceivedforhundredsofcopies', 'sorely-persecuted', "it'diaerithinates", 'includingtheologians', 'madeinstitutions', 'toanythingtending', 'sundaylawsclaimthat', 'venerablespioneer', "theworl'd-fathed", 'dark-superstitions', 'caramminambemimennommirom', 'faceteytvlaoliat', 'ttttiiiiiityttttttttttf', 'american-sabbath-union', 'whichweresosuddenlyconvertedintoreligiopolitical', 'theehouseholdmoellerofhealth', 'non-preservative', 'suppressordinance', 'pleasure-seekers', 'namesofscripture', 'sientripientimea', 'interestingthisweek', 'songsforlittleones', 'ittttttttttttttttttti', 'enforcepolitical', "aseeuted'jjaammees", 'toexerciseanycoercionwhatever', "l'itite'situcat'", "heading'paragraphs", "that'association", 'office-distributing', 'excitement-loving', 'thereligicairiberty', 'exereiseithereofv', 'liberty-lovingstatesmen', 'inresponsibility', 'photo-electrotyping', 'thoroughlyfurnished', 'theirconvictions', 'judgesteinoverruledthepleathat', 'bwaltztatoyeatort', 'bibleillustrationsandstoriesthatwehavebeen', 'religiouscharaeter', 'consideraminister', "religiously'observe", 'heavenlycitizenship', 'ongregationaliistychluich', 'singlesubscription', 'smokeof-burning-', 'heavyto-be-borne', 'compelleartalligten', 'seventyfive-dollar', 'carefully-guarded', 'adaptthelifeofchristtothe', 'nationalpridethat', 'zondaysschencling', 'pseudo-christianity', 'prisonconsecrated', 'church-and-state', 'protectionaccorded', 'postmaster-general', "salisbury'sgearlesscorset", 'cannot-regard-their', 'fellow-countrymen', 'especiallysuited', 'smintrifilivmila', 'thoseinstruments', "governmentsgod's", 'importantpiestion', 'ckinciickieseuonf', 'ivilerlicelvajle', 'appallingproportions', 'great-grandfather', 'sixtymile-an-hour', 'romanismandcivilliberty', 'compulsory-idleness', 'constistitutional', 'correctlyrepresent', 'constitution--legislation', 'democratic-republican', 'gtilttertisenteitth', 'employmentelsewhere', 'idouwillixtnintrcuicstoru', 'sseellrfonouncing', 'ffitymityytyymyytyymtv', 'andthysicalvigor', "the'difficulties", 'ihavereceivedmybible', 'religio-politicians', 'democraticgovernment', 'counter-memorials', 'familygovernment', 'racravripyvioatv', 'atatatatatatatatatat', 'ifwiththetongues', 'sundayconcertintheoperahouse', 'commandment-keepers', 'allworkingpeople', "apartmentbuilding's", 'disconnectedherself', 'florencejarizona', 'practicepersecutionfor', 'intentionallyignore', 'miiiiimiiiiiimiiiiimiimmiiiiim', 'certainconditions', 'religioueliberty', 'lieutenant-colonel', 'scatteredthrough', 'religionsithings', 'appealandremonstrance', 'itigillihwililljaiiira', 'accomplishstatement', 'furtherexpressed', 'practicalreference', 'sendittoyourfriends', 'theargumentwhich', "money-gatherers'", 'seventh-day-keeping', 'amusement-loving', 'divinely-imposed', 'nviaenikensommewniegoe', 'asgoodassellsfor', 'agnosticsperhaps', 'saturday-sabbath', 'mmerrimmilummummulimmmummillunmil', 'sacredychronology', 'semi-reitschensk', 'perrnariehipolitieo', 'heaven-descended', 'ttttttttttttttttttttttttttttli', 'missiourielected', 'pseudo-religious', "superintendents'", 'writefordescebtlyocattuegue', 'sunday-legislation', 'self-opinionated', 'protestant-jesuit', 'permissibleunder', 'weaschristianworkersinthecauseofchristdeem', 'ofassortedhealthfoodcrackerssentpost', 'cenacliolieeoelpe', 'beuncompromisingiyoppesedto', 'priziateyinterviews', 'politicsirepresents', 'divinely-ordained', 'independencelies', 'kaitnyrdieixicseuonf', 'theyosemitevalley', 'everybodylaughed', 'counter-revolution', 'the-mediterranean', 'sheepskin-covered', "and'spiritualists", 'scientificamerican', 'otherinsurrectionists', 'dayadventistsandthecourts', 'nationshilthighty', 'thesafeligiousintolerancefromwhichallreligious', 'zntsthxtrealgcterxwc', 'social-amusement-loving', 'enough-punishment', 'insurpassability', 'ritualhealtheltreat', "arrested'c-harged", 'merieanstatepapers', 'quarter-centennial', "teifige'znegivgtig", 'diosthnontoptuhfci', 'civilgovernmentandreligion', 'breckinridge-morse', 'amazingprevalence', 'christiansunconsciously', 'amtrintcarkilong', 'thegiairoraffitiliw', 'etianprinciplesof', 'importancethanthe', 'sundaylawmovement', "calieds'aisealpt", 'court-martialled', "xrcavolitio'obegbhuingezanra", 'kirchengeschichte', 'american-philippines', 'inquisitor-general', 'church-fellowship', 'thenaturalallianceexisting', 'independencovhich', 'anti-evangelical', 'quickly-discovered', 'self-preservation', 'carriedoutinthenameofthewholecatholiccommu', "the''''anierican", 'nineteenthcentury', 'correspondentadmits', 'advertisement-writers', 'undervitalizatiom', 'pagancounterfeits', 'counter-petition', 'christianity-with', 'larciestiiedical', "attorney-general's", 'overwhelminglyin', 're-enteringfields', 'decently-dressed', "national'apostasy", 'andtobringdowndamnationandcursesuponevery', "ruted'jamestanner", 'compelattendance', 'othermakesmaybegood', 'specialarrangement', 'indifferentiated', 'theunitedstatessenate', 'assurriptionists', 'amitricanininelo', 'itisjustwhatihavelongwanted', 'selfpreservation', 'ittenmtlettmmtrimitilm', 'uticompromisingly', 'certainlynoeffort', 'inconsistencythe', 'presbyterianbrother', 'aviorousandtisrrinad', 'revolutionaryresolution', 'fdiesthnontoptubfef', 'subscriptionprice', 'commercial-appeal', 'unitedstatesconstitutionasit', 'rapidly-increasing', 'ecclesiasticocivil', 'evidenceattachecl', 'thebiblegivenasapresentforsixnewyearlysubscriptionsat', "our-times-'toward", 'fellow-believers', 'brigadier-general', 'religio-philosophic', 'mmiimiiiiiiimiiiiiimiiiimm', 'icarapyrieliaovrat', 'entiremembership', "pgafria'nb'tatif", 'reuaftintofbeeticed', 'ftillieratillteminnimiennisiiiiiiwangmwo', 'papacywasfullydeveloped', "se'whatetherssay", "administration'was", 'fifteenth-century', 'icfaytoaulhoagveoafnoyuirdepaerobflpiucracthiasoin', 'politicaldiseussions', "oriall'ittiseiprer", 'pointofdisturbance', 'would-bereformers', 'ten-thousand-mile', 'whichgovernments', 'thedowadelegationrand', 'tobemightyupontheearth', 'prohibitionblasphemy', 'church-cherished', 'temporalexpediency', 'thenationalsundaylawbanned', 'incomprehensibilities', 'belieftprqbrship', 'astothemeritsofthebibleweoffer', 'amendmentthought', 'onstratethepropriety', 'catholiestandard', 'educationaljathe', 'wemustthereforeconcludethatthe', 'includedwhatever', 'whatabouttheindividualwho', 'pageillustrations', 'tenderrestsupontendrive', 'precipitatelyfrom', 'goodsubstantialhighgradebicy', 'thedifferentstates', 'mueontoutlhninfg', 'penalties-enacted', 'furtherinformation', 'thesentinellibrary', 'antipedo-baptists', 'soul-crushingcorporations', "smitli'sdialraifi", 'religiousinstruction', 'mixiimiiiiimiiiiiinniiiiime', 'gttittertistinents', 'ordinaryinstruments', "teachers'fecieration", 'church-instituted', 'defendthemselves', 'theamericansentinel', 'righteousnessright', 'fourtlybommandment', 're-establishment', "will-o'-the-wisp", 'sanctimoniouspolitical', "students'library", 'forashorttimeonly', 'ever-compassionate', 'aravarimiiiiininisruninisimigivar', 'hethatspeakswithatongue', 'socialist-catholic', 'imomenzipipimmiiisim', 'foreigncountries', 'muchtoitseducationalvalue', 'luinrdeorstparnod', 'addireadytobreakandoverwhelmitinsocial', "atnerican'exposition", "will-o'-thewisps", 'miiiiimiiitimomi', 'individualchristians', 'andpronouncesthemwith', 'leaderoftheaceinpracticalimprovements', 'sthepapacyinprophecy', 'underacknowledgment', 'nationalconstitution', 'pilateunderstood', 'samplecopiesmailed', 'commander-in-chief', 'neofthelargestsanitariumin', 'containingadditional', 'selfstultification', 'caveatsjrademarks', 'cross-questioning', 'waspresentatthetrialoutlinestheproceed', 'admininistration', 'statedistinguished', "fox'sbookofmartyrs", 'labor-protecting', 'nineteenth-century', 'presbytericvnism', 'itfollowsthatthisisnot', 'ex-attorney-general', 'pacificpressmussingco', "proipnhepsayri'npanardt", 'ourbabyisatestimonialtosanitariumfood', 'self-sufficiency', 'tdivinitycircuit', 'idsimpleconstruction', 'whichisperfectlyproperifthey', 'presspublishingco', 'statcesonstitution', 'knowledge-disseminating', 'politicalcorruption', 'inventioncertain', 'sergeant-at-arms', 'inseparablerelation', 'thecounselforthe', 'civicrighteousness', 'religious--observance', 'densely-populated', 'prohibitspriests', 'afitritifiralneviran', 'observinstitution', 'physical-necessity', 'religio-political', 'interdenominationalism', 'opportunitiesfor', 'uncompromisinglyopposed', 'evaseparate-isfr', 'non-intervention', 'information-seekers', 'half-century-old', "wetfavatwarseletertese'letetesetew", 'thecatholicchurchcannotdoany', 'papacyinprophecy', 'politicalreligious', 'billiard-playing', 'unfpracarpanmici', 'persecutionbothinrussiaandgermany', 'theresponsibility', 'hisgloriousappearing', 'importantito-day', 'religious-liberty', 'karapynolicrerat', 'cynosureindorses', 'instanceexpansion', 'evangelical-lutheran', 'thegreatestreformer', 'pago-christianity', 'unctralitioraitiftkly', 'postagestampsaccepted', 'thtshbeoonikahnauscbriethfohrder', 'trance-mediumship', 'act-of-parliament', 'titmitmmitimmvitimmvimimim', 'constitutional-principles', 'morally-instructed', 'ithethobjecickainnaidig', 'attorney-general', 'spiritualmindedness', 'latecommissionerof', 'awnfloaiiiavhmasct', 'straight-jacketed', 'tailtstkibitiontiottitallp', 'christianisabbath', 'prayer-meetmeeting', "ictrliot'ocoteuhi", 'pseudo-millennium', 'vanderbilt-rockefeller', "themselves'damnation", 'piearksetooasdas', 'politicalspeeches', 'anotherinfluentialcommittee', 'independtrespassers', 'tax-gatherportant', "be'liarticulatif", 'direcmanufacture', 'vssbiatotiaysord', 'religioustraining', 'ednimittaeappointed', 'sabbathdesecration', 'systeinisthestate', 'carefullyselected', 'theseintroductory', 'brecorrespondent', 'sikteefitlfeentuty', 'perfectlycertain', 'reprefientatives', 'concordance--subject', 'concernministers', 'universally-binding', 'its-constitutionality', 'anti-sunday-work', 'alvjetriiezica-int', 'abookforthechildren', 'rapidly-approaching', 'coinmuniccations', 'notuhnesreeomwlny', 'singlosubsoription', 'state--possesses', "frow'massachusetts", 'iipiiibsbirreinin', 'self-justification', 'gitvtriistattnts', "mechanicar'processes", 'papillaryattradtion', 'ireceivedthebibleingoodcondition', 'tttttttttttttttttttttttttttti', 'self-righteousness', 'uncompromisimily', 'fundamentalprinciples', 'frommassachusetts', "diligently'instruct", 'criminalzofficer', 'iubocnidcmsatlrae', 'vastexpenditures', 'noresponsibility', 'instructiongiven', 'forbearingoneanotherandforgivingoneanon', 'rightfullypossess', 'waterburyamerican', 'occasionally-found', 'whichhasforitsobjectaunionofchurchand', 'itisacompletehistoricalanddescriptivesummary', 'spirit-wrestlers', 'aiviericansentinele', 'thecommonwealths', 'illustriousvisitor', 'teodmyuapratliyving', 'responsibilflicted', 'non-professional', 'chrisrequirements', 'payingsecurities', 'unitedstatesconstitution', 'humorist-philosopher', 'thereligpossible', "politically'included", 'itsotsvesisssiti', "l'atrztomiwil'illf", "'reconcentrados'", 'm-hintthyltsfftr', 'immobility--that', 'bestandbiggestnewspaper', 'government-endowed', 'commandment-keeping', 'the-incipleneyof', 'exemption-appendix', 'itlymnvmmninntvivirmlnyrninnyvvrtfummrmymvmmyy', 'lieutenant-general', 'this-communication', "tourists'edition", 'uneompromisingly', 'overorganization', 'lspeadebilitated', 'nationalreligion', 'civilgovernineat', 'interferencemust', 'religious-legislation', 'isdiscriminationand', 'handsomelyillustrated', 'distribution-win', 'eternally-enduring', 'accuratepronuncia', 'pulpit-reverencing', 'counter-arguments', 'damefashionandherslaves', 'certainunalienable', 'andalsoofthedominionof', 'temporalgovernments', 'respectable-looking', 'differencelbetween', 'totouristsandallclassesofinvalids', 'ofpageitwillbeseenthattherevisedsieornreads', 'andyoushouldreadit', 'presenting-popish', 'righteousnessthat', 'citerdreilediettlith', 'interferencewith', 'these-dissenting', "american'executive", 'suchanti-christian', 'union--embracing', 'thisencouragement', 'god-in-the-constitution', 'repudiconscience', 'congressman-elect', 'elfavpronouncing', '------------------------', 'fellow-passenger', "the'''onventional", 'circumstancesithe', 'concerningthismuch', 'theresponsibilities', 'miiiiimiiiiiimiiiiimiiiiimiiiiim', 'totouristsandall', 'multi-millionaire', 'infaithfulwarning', 'xpositionbnildings', 'issofarasregards', 'trans-continental', 'tleeeeeseeeemeetreeeeeeleeeeoweeek', "negoweenalkogee'ree'lkowee'ftielieiegeseilielelereennellege", 'witgibettbacription', 'arrestediprpvided', 'icarapynoicrovray', 'vrecrlanrrsefeinretinuelerietso', 'iaicificpresspublishing', 'qualificaproperly', 'andsuperiorgoodness', 'bois-de-boulogne', "pernicious'effects", 'endangeringamerican', 'sundaymuseum-closing', "alldenominations'", 'half-disheartened', 'corncommandments', 'dishonuncivilized', 'receivingtheamericansentinel', 'catarrhinhalerfree', 'ex-congregationalist', 'christian-civilization', 'practicestouching', 'b-uc-h-a-d-n-e-z-z-a-r', 'betfererigagerfents', 'isthepapacyinprophecy', 'nearly-co-extensive', 'iimmuttimtesetstliumilimosillumetuilmtounntimmilitemttlirmillotmultm', 'declaratiorrstates', 'looselegislation', 'withhandsomedesignincolors', 'igshallwbheicdiiot', 'ameeicansentinel', 'threedollar-a-day', 'fellow-clergymen', 'covxaxalkaifrtil', 'beulicompromisinglyopposedto', 'gospelredemption', 'thatzwouhaveilaws', "legislativ'fhalls", 'irreparabledamage', "world'sfairinchicago", 'sparsely-settled', 'school-inspector', 'sundaynewspapers', 'blood-guiltiness', 'thoroughnational', 'unparlianientary', 'postmastergeneral', 'anti-reformation', 'theirconfinement', 'rougotrikitgeusp', 'pliiiilloototiollipimpiiitilligill', 'turbulently-inclined', 'followingpropositions', 'definitelylocated', 'articlesfrformom', 'representativesfromdifferentpar', 'caytoaulhoavgeoafnyouindepuaobflpicuraethiaosinngs', 'amsterdampleyden', 'andtoforbideverythingwhichisnot', 'unrrecardenpraid', 'politicalteligionists', 'containinghotiseholdand', 'aleaderofthenewdemocracy', 'whatever-standpoint', 'selfcontradictory', 'director-general', 'anti-expansionists', 'tilitakgilowledg', 'inspectorgeneral', 'llitttitittittilltja', 'well-proportioned', 'receiver-general', 'overwfieliningaria', 'ptillsfilielesddigrallrgt', 'loverofcivilandreligiousliberty', 'appropriationonsunday', 'cannot-buy-or-sell', 'constideclaration', 'civilrgovernment', 'successfullyused', 'super-calendered', 'oliwethoebbjercetekaindr', 'imidinovosillyisp', 'governmentappointed', "'self-government", 'butthecommandmentsofgodapplytothe', 'successfullydprosecuted', 'widely-different', 'nagwordsabandebelhievaeqnat', 'self-gratification', 'fellowcommissioners', 'thelawswhichprotectitare', 'counter-political', 'thicklyinhabited', 'bbelebnleunsienagrl', 'touitrrehinelieteitnhi', 'robber-chieftans', 'isdtointctreasekthesubscriptitoinlisltoffthe', 'thegroundsandartgalleriesmightbeopen', 'foritseinipirneeaitteiothertaltssittistninuitsrreistleilice', 'thelliibbeerralliitty', 'iiinssaerksetooasaas', 'exercisethemselves', 'pagancounterfeitsinbtyhe', 'antipedobaptists', 'notthelawsregardingsundayobservanceaconcession', 'prohibitszfreedom', 'iinothingbetterpublished', 'thfiftlestidifiblimitirlitif', 'religiousliberty', 'fiftyonethousand', 'protestantseatholics', 'twenty-four-hour', 'sanctificationist', 'developmenthasvaried', 'itmakesallthedifferencein', 'ltoobothforemote', 'christianitywould', 'highly-civilized', 'elementarycdaution', 'recfeoivreydeianrrsefeirnegnucieriteso', 'appeal-avalanche', 'verbatimreportofthespeechesof', 'austro-hungarian', 'ileustrationsare', 'ifyoupreferthehalf', 'legislaincorporation', 'consciencewillbe', 'scientifically-proved', 'thtishbeoomhahnauscbreipsst', 'websterdictionary', 'non-church-goers', 'politico-ethical', 'amendmentproposed', 'sectioh-rdeclared', 'isunconstitutionaland', 'selfinterpreting', 'willbesenttoanyaddressonre', 'itwasshowntohimandto', 'churchmembership', 'thanksgiving-days', 'thatscivilization', 'no-day-in-particular', "religiopolitical'", 'street-preaching', 'eieleeriieseceix', 'american-catholic', 'wanttopraylongandprayearnestlyand', 'personal-liberty', 'lengtlireonimunication', 'pacificbainescollege', "'representative'", 'sciatriameagency', 'elkhartcaizeiage', 'theseextraordinary', 'butwhetherprophecies', 'writefordeseriptivecatalogue', 'previousviotation', 'divinely-conferred', 'dynasty-stricken', 'sunday-journalism', 'yodfamaegxedceolfflereednatt', 'theweightofoneof', 'spiritually-minded', 'sabbath-observance', 'renderingallegiance', 'miiiiimiiiiimitlin', "correspondent'of", 'seriously-minded', "ithe'iinpossibillw", 'sitoorrioraltity', 'sthvatlusaobtreuafohrte', 'dearlyunderstand', 'interestsaffected', 'gmakeasurecovenant', 'receivesappropriations', 'interffeerreennccee', 'sunday-closinglaws', 'liberty-exemplifying', 'circularsandfull', 'beuncompromisinglyopposed', 'policemanization', 'reefeoiredyeiaerrsefe', 'thanksgiving-day', 'discriminationshall', 'bottomwithmetalandready', 'thtishbeomokahnauscbreipent', 'libertypossessed', "'putratherthayte", 'theissueswhichthispaperdiscussesarethe', 'non-ecclesiastical', 'theauthorhassoughttomakethisbookone', 'civilly-enforced', 'whiicchhppiicture', 'toultknlitttlftten', 'present-president', 'divisiondvizithenndividual', 'no-entanglingalliance', 'these-jealous-minded', 'congresstoopposetheadoptionofthejointresolution', "didn'tiresigniand", 'especiallyforconvertingnutsintobutterfor', 'fikerfilneakatist', 'otherimprovements', 'thattheyareinerror', "liberty'association", 'seventh-partof-time', 'butthewatermanisthebest', 'kblifibrbatatelto', 'alutroroxitiphroheil', 'historyofamerica', 'slagglezatattogiveitv', 'lottelltrzoistax', 'wehaveaselectstockofthisbeautifulandinstructive', 'lduidcircorouuss', 'lewatedboyerlifer', 'imchangeableness', 'foracieidanpraco', 'establishingamerican', 'exciteadmiration', 'oublisheitquarterly', "constitufiou'and", 'sliiiiimiliinneliiiiim', "'constitutional'", "missionary'incitiful", 'an-impossibility', 'renderacceptible', 'beenlrecommended', 'straight-fromthe-shoulder', 'mudthestreasoning', 'narrowest-minded', 'half-consolidated', 'thwompromisingly', 'sabbath--desecration', 'suclitsltyithuetaelfthofrutle', "profess'clitistianity", 'thoroughlyintroduce', "resolution'adopted", 'harmlesstmeeting', "self-government'", 'areportofthehearingonthesundayclosingof', 'pauobflpicuarcthiaosins', 'long-established', "wew'ilrleceivepostagestampsinsmallquantitiesandanykindofgood", 'bishop-assistant', 'sabbathobservance', 'seventh-dayadventist', 'self-pronunciation', 'possessinterposes', 'puritan-american', 'unscripturalalso', 'self-aggrandizement', 'alifornimiligsts', "missionary's'work", 'significaquestion', 'individualfreedom', 'andveryseldomevenin', 'misunderdulgence', "german'missionaries", 'miiiiimiiiiiimiiiiim', 'beingtaughtinourschools', 'breckhriagesundaybill', 'communityseventy-five', 'carefully-gleaned', 'plainly-apparent', 'wtgeantseacteirvye', "legislative'halls", 'iljniprieciidienirbe', 'ahatihntenaarvelai', 'healthandtemperancemiscellany', "imperial'government", 'german-americans', 'daintily-prepared', 'divinelyappointed', 'curiosity-seeking', 'practiceintosmall', 'treasurer-general', 'widelyrecognized', 'dfamaegxedceolfielreednatt', 'non-parishioners', 'civilgovernments', 'non-interruption', 'prevailingneglect', 'post-intelligencer', 'thunderingcataract', 'underconsumption', 'miiiiimiiiiimiciiiimiiimmiiiiim', 'counsellor-at-law', 'andthesehavebeenturnedover', 'constitutionallimitations', 'civilandreligiousfreedom', 'aecyteiryyewhere', 'rvsepvitittauarltsic', ...], 15)
Correction 8 -- Remove long error tokens¶
# %load shared_elements/remove-tokens-with-long-strings-of-characters.py
prev = cycle
cycle = "correction8"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
sub_list = ["m|M", "e|E", "f|F", "l|L", "i|I", "t|T"]
replacements = []
for sub in sub_list:
replacements.append(clean.check_for_repeating_characters(tokens, sub))
replacements = [item for sublist in replacements for item in sublist]
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
AmSn18890710-V04-24-page7.txt: [('PACIFICBilliollEoll', ' ')]
AmSn18911126-V06-46-page1.txt: [('PliiiilloototiollIPIMPiiitilligill', ' ')]
AmSn18921020-V07-41-page1.txt: [('iiiiiiiiiiiiii', ' ')]
AmSn18960402-V11-14-page3.txt: [('IIIIIIIIIIIIIIIII', ' '), ('IIIIIIIIIIIIIII', ' ')]
AmSn18960924-V11-38-page5.txt: [('INTERFFEERREENNCCEE', ' ')]
AmSn18980113-V13-02-page1.txt: [('ifigiiiiiiiiiiii', ' ')]
AmSn18980120-V13-03-page1.txt: [('iiiiiiiiiiiiiiii', ' '), ('AilliliilliiiiIIIMIIIIII', ' '), ('MENIIIiiiiiiii', ' ')]
AmSn18990202-V14-05-page13.txt: [('tleeeeeseeeemeetreeeeeeleeeeoweeek', ' ')]
AmSn18990810-V14-31-page14.txt: [('MIIIIIMIIIIIIMIIIIIMIIIIIMIIIIIM', ' ')]
AmSn18990817-V14-32-page14.txt: [('MMVIMAIMUMMIAMIAMIIIMMINEMIIMMIIIMMOM.', ' '), ('MIIIIIiiIIIIMS', ' '), ('MMVIMAIMUMMIAMIAMIIIMMINEMIIMMIIIMMOM.', ' '), ('MMIIMIIIIIIIMIIIIIIMIIIIMM', ' ')]
AmSn18990824-V14-33-page14.txt: [('IIIIIMIIIIIMMIIIIINI', ' '), ('MIXIIMIIIIIMIIIIIINNIIIIIME', ' ')]
AmSn18990831-V14-34-page14.txt: [('MIIIIIIMAIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIIMmimMliiiIM', ' ')]
AmSn18990907-V14-35-page14.txt: [('MIIIIIMMIIIIIIMIIIIIM', ' ')]
AmSn18990914-V14-36-page14.txt: [('SliiiiiMIliinneliiiiim', ' ')]
AmSn18990914-V14-36-page15.txt: [('eitifiltWiffeffalliallill', ' ')]
AmSn18990928-V14-38-page14.txt: [('militiMIIIIIMIllirMIIIIIMIIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIM', ' ')]
AmSn18991019-V14-41-page14.txt: [('IMIIIIIMIIIIIMIIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIMIN', ' '), ('MIIIIIMIIIIIMIIIIIM', ' ')]
AmSn18991102-V14-43-page14.txt: [('MIIIIIIMMIIIIIM', ' '), ('MIIIIIMIIIIIMIIIIII.', ' ')]
AmSn18991109-V14-44-page14.txt: [('MItttIMIIIIIIIIIIMIIIIIMIIIII', ' ')]
AmSn18991207-V14-48-page15.txt: [('M.IIIIIMIIIIIMIIIIIMIIIIMICIIIIM', ' '), ('MIIIIIMIIIIIMIIIIIM', ' ')]
AmSn18991214-V14-49-page15.txt: [('iiiiiiiiiiiiiiii', ' ')]
AmSn18991228-V14-50-page15.txt: [('ImmummtimmomOmmumMommON.MMI.O.Wilimm.MMERRIMMiluMMummuliMMmummillunmil.MminmMuummunmmummismimmil.mmlimmmulimmili.mmsmWtimMmuummlimmw.m.m.ft.mammW.M', ' '), ('mimmummulimmOUmunnmOluimmmumm.m.Mumnaum.mlimmmummumilMi.Mmuimft', ' ')]
AmSn19000104-V15-01-page15.txt: [('MIIIIIMIIIIIMIIIIIMIIIIIMIIIIIM', ' ')]
AmSn19000111-V15-02-page14.txt: [('MIIIIIMIIIIIMICIIIIMIIIMMIIIIIm', ' ')]
AmSn19000118-V15-03-page14.txt: [('MIITIIMIIIIIM', ' '), ('MIIIIIMIIIIIIMIIIIIMIIMMIIIIIM', ' ')]
AmSn19000215-V15-07-page13.txt: [('IIIIIIMIIIIIMaiiiiimulimM', ' ')]
AmSn19000301-V15-09-page13.txt: [('ImprimmommwmimmoVum', ' ')]
AmSn19000308-V15-10-page14.txt: [('MIIIIIMIIIIIIMIIIIIM', ' ')]
AmSn19000322-V15-12-page14.txt: [("NegoweeNalkogee'Ree'lkowee'ftielieiegeseilielelereeNnellege", ' ')]
AmSn19000329-V15-13-page14.txt: [('eeeeeeeeeeeeeeee', ' ')]
AmSn19000517-V15-19-page11.txt: [('TYMMITIMMITIVIIMMIIMMIll', ' ')]
AmSn19000621-V15-24-page15.txt: [('TTTTIIIIIITYTTTTTTTTTTF', ' '), ('LLITTTITITTITTILLTJa', ' ')]
AmSn19000705-V15-26-page15.txt: [('TTTTTTTTTTTTTTTTTTTTTTTTTTTTI', ' ')]
AmSn19000719-V15-28-page10.txt: [('TITMITMMITIMMVITIMMVIMIMIM', ' ')]
AmSn19000823-V15-33-page15.txt: [('TTTTTTTTTTTTTTTTTTTTTTTTTTTTLI', ' '), ('ITTTTTTTTTTTTTTTTTITTTT', ' ')]
AmSn19000823-V15-33-page8.txt: [('itlymnvmmninntvivirmlnyrninnyvvrtfummrmymvmmyy', ' ')]
AmSn19000906-V15-35-page15.txt: [("TTTTTTTTIII'TTTTI", ' '), ('TTTTTTTTTTTTTTT', ' ')]
AmSn19000920-V15-37-page15.txt: [('ITTTTTTTTTTTTTTTTTTTI', ' ')]
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction8 Average verified rate: 0.9830200792014474 Average of error rates: 0.01865562518651149 Total token count: 8363231
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
[("'", 8053),
('t', 4426),
('e', 3978),
('d', 3950),
('w', 3754),
('co', 3408),
('m', 3171),
('n', 3046),
('f', 2035),
('r', 2028),
('th', 1645),
('g', 1371),
('mo', 1160),
('u', 926),
('x', 864),
('ex', 521),
('pa', 410),
('q', 399),
('sunday-law', 334),
('k', 315),
("the'", 304),
('pp', 299),
('tion', 276),
("conscience'", 260),
('ch', 253),
('seventhday', 249),
('re', 224),
('ga', 220),
('oc', 218),
('z', 215),
('wm', 215),
('satolli', 210),
('employes', 209),
('munn', 207),
('ti', 200),
('id', 181),
('un', 173),
('ry', 170),
('al', 166),
('sunday-closing', 160),
('ca', 151),
('ment', 146),
('chain-gang', 136),
("to'", 134),
('nd', 130),
('ll', 128),
('lb', 125),
('il', 123),
('bateham', 122),
('cmsar', 121)]
Correction 9 -- Separate Squashed Words¶
# %load shared_elements/separate_squashed_words.py
import pandas as pd
from math import log
prev = cycle
cycle = "correction9"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
verified_tokens = []
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
clean.get_approved_tokens(content, spelling_dictionary, verified_tokens)
tokens_with_freq = dict(collections.Counter(verified_tokens))
words = pd.DataFrame(list(tokens_with_freq.items()), columns=['token','freq'])
words_sorted = words.sort_values('freq', ascending=False)
words_sorted_short = words_sorted[words_sorted.freq > 2]
sorted_list_of_words = list(words_sorted_short['token'])
wordcost = dict((k, log((i+1)*log(len(sorted_list_of_words)))) for i,k in enumerate(sorted_list_of_words))
maxword = max(len(x) for x in sorted_list_of_words)
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = utilities.strip_punct(content)
tokens = utilities.tokenize_text(text)
replacements = []
for token in tokens:
if not token.lower() in spelling_dictionary:
if len(token) > 17:
if re.search(r"[\-\-\'\"]", token):
pass
else:
split_string = clean.infer_spaces(token, wordcost, maxword)
list_split_string = split_string.split()
if clean.verify_split_string(list_split_string, spelling_dictionary):
replacements.append((token, split_string))
else:
pass
else:
pass
else:
pass
if len(replacements) > 0:
print("{}: {}".format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
AmSn18860101-V01-01-page3.txt: [('accordpreservation', 'accord preservation')]
AmSn18860301-V01-03-page6.txt: [('indifferenumeration', 'in differ enumeration')]
AmSn18860301-V01-03-page8.txt: [('whichhasforitsobjectaunionofchurchand', 'which has for its object a union of church and'), ('whichweresosuddenlyconvertedintoreligiopolitical', 'which were so suddenly converted into religio political')]
AmSn18860501-V01-05-page1.txt: [('establishredemption', 'establish redemption')]
AmSn18860601-V01-06-page1.txt: [('theresponsibilities', 'the responsibilities')]
AmSn18860601-V01-06-page6.txt: [('legislaincorporation', 'leg is lain corporation')]
AmSn18860901-V01-09-page6.txt: [('Presbyterianbrother', 'Presbyterian brother')]
AmSn18861001-V01-10-page7.txt: [('elementaryprinciples', 'elementary principles')]
AmSn18861201-V01-12-page8.txt: [('WEhaveaselectstockofthisbeautifulandinstructive', 'WE have a select stock of this beautiful and instructive'), ('Thebookisneatlygottenup', 'The book is neatly gotten up'), ('printedongoodpaper', 'printed on good paper')]
AmSn18870101-V02-01-page3.txt: [('practicepersecutionfor', 'practice persecution for')]
AmSn18870301-V02-03-page2.txt: [('heartyacknowledgnaont', 'hearty a c know led g n a o n t')]
AmSn18870501-V02-05-page8.txt: [('willeverbeuncompromisingly', 'will ever be uncompromisingly')]
AmSn18870701-V02-07-page4.txt: [('Christianinstitutions', 'Christian institutions')]
AmSn18870801-V02-08-page2.txt: [('determinationforesaid', 'determination foresaid')]
AmSn18871001-V02-10-page8.txt: [('sanctimoniouspolitical', 'sanctimonious political')]
AmSn18880401-V03-04-page5.txt: [('followingpropositions', 'following propositions')]
AmSn18880601-V03-06-page7.txt: [('Stevensonandhisconstituents', 'Stevenson and his constituents')]
AmSn18881001-V03-10-page7.txt: [('havebeennoneofourbusiness', 'have been none of our business')]
AmSn18881015-V03-10a-page7.txt: [('beingtaughtinourschools', 'being taught in our schools'), ('andevenherecognizesinthe', 'and even he recognizes in the')]
AmSn18881201-V03-12-page4.txt: [('ProhibitionBlasphemy', 'Prohibition Blasphemy')]
AmSn18890213-V04-04-page3.txt: [('brieflycomprehended', 'briefly comprehended')]
AmSn18890320-V04-09-page7.txt: [('recuperatingqualities', 'recuperating qualities')]
AmSn18890327-V04-10-page7.txt: [('LOVEROFCIVILANDRELIGIOUSLIBERTY', 'LOVER OF CIVIL AND RELIGIOUS LIBERTY')]
AmSn18890417-V04-13-page7.txt: [('Avigorousandstirringad', 'A vigorous and stirring ad')]
AmSn18890424-V04-14-page7.txt: [('SCIENTIFICAMERICAN', 'SCIENTIFIC AMERICAN')]
AmSn18890515-V04-16-page7.txt: [('sufferingswhichtimelyattentionmighteasilyhaveprevented', 'sufferings which timely attention might easily have prevented')]
AmSn18890522-V04-17-page7.txt: [('Theissueswhichthispaperdiscussesarethe', 'The issues which this paper discusses are the')]
AmSn18890522-V04-17-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18890605-V04-19-page7.txt: [('ButaGoodOneisaJoyForever', 'But a Good One is a Joy Forever'), ('SCIENTIFICAMERICAN', 'SCIENTIFIC AMERICAN')]
AmSn18890612-V04-20-page7.txt: [('LEADEROFTHEACEINPRACTICALIMPROVEMENTS', 'LEADER OF THE ACE IN PRACTICAL IMPROVEMENTS')]
AmSn18890703-V04-23-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18890710-V04-24-page7.txt: [('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION')]
AmSn18890724-V04-26-page7.txt: [('PACIFICBainesCollege', 'PACIFIC B a ines College'), ('yousupposethatthegovernment', 'you suppose that the government')]
AmSn18890807-V04-28-page2.txt: [('brieflycomprehended', 'briefly comprehended')]
AmSn18890807-V04-28-page3.txt: [('Earthlygovernments', 'Earthly governments')]
AmSn18890807-V04-28-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18890821-V04-30-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18890828-V04-31-page2.txt: [('temporalexpediency', 'temporal expediency')]
AmSn18890828-V04-31-page8.txt: [('Allthereformswhichtheassociationregard', 'All the reforms which the association regard')]
AmSn18890905-V04-32-page1.txt: [('afterconsideration', 'after consideration')]
AmSn18890905-V04-32-page2.txt: [('theUnitedStatesSenate', 'the United States Senate')]
AmSn18890918-V04-34-page4.txt: [('ChristianStatesman', 'Christian Statesman')]
AmSn18890918-V04-34-page8.txt: [('profitableemployment', 'profitable employment')]
AmSn18890925-V04-35-page7.txt: [('THEMACHINEHASNEVERBEENUSED', 'THE MACHINE HAS NEVER BEEN USED'), ('THEWEEKLYWISCONSIN', 'THE WEEKLY WISCONSIN'), ('THEWEEKLYWISCONSIN', 'THE WEEKLY WISCONSIN')]
AmSn18891002-V04-36-page7.txt: [('thedifferentStates', 'the different States'), ('THEMACHINEHASNEVERBEENUSED', 'THE MACHINE HAS NEVER BEEN USED')]
AmSn18891016-V04-38-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18891023-V04-39-page6.txt: [('counterattractions', 'counter attractions')]
AmSn18891030-V04-40-page7.txt: [('EMPLOYMENTandWEESE', 'EMPLOYMENT and WEE S E')]
AmSn18891106-V04-41-page7.txt: [('toTouristsandallclassesofInvalids', 'to Tourists and all classes of Invalids')]
AmSn18891113-V04-42-page7.txt: [('ThePicturesqueRouteforBusinessand', 'The Picturesque Route for Business and')]
AmSn18891113-V04-42-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18891120-V04-43-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18891127-V04-44-page2.txt: [('ecclesiasticocivil', 'ecclesiastic o civil')]
AmSn18891127-V04-44-page5.txt: [('thedowadelegationrand', 'the do wade legation rand')]
AmSn18891127-V04-44-page7.txt: [('BESTandBIGGESTNEWSPAPER', 'BEST and BIGGEST NEWSPAPER')]
AmSn18891204-V04-45-page5.txt: [('demonstradepriving', 'demons trade p riving')]
AmSn18891204-V04-45-page7.txt: [('oftextsforeverydayintheyear', 'of texts for everyday in the year'), ('Abookforboysandgirls', 'A book for boys and girls'), ('willbesenttoanyaddressonre', 'will be sent to any address on r e')]
AmSn18891211-V04-46-page7.txt: [('AcresLandintheaboveCounties', 'Acres Land in the above Counties'), ('Theauthorhassoughttomakethisbookone', 'The author has sought to make this book one'), ('HistoryofProtestantism', 'History of Protestantism')]
AmSn18891218-V04-47-page8.txt: [('intentionallyignore', 'intentionally ignore')]
AmSn18891225-V04-48-page7.txt: [('tainingtestimonials', 'tain ing testimonials')]
AmSn18891225-V04-48-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18900116-V05-03-page7.txt: [('inanypropersenseofthe', 'in any proper sense of the')]
AmSn18900130-V05-05-page3.txt: [('studiouslyssecular', 'studiously s secular')]
AmSn18900130-V05-05-page7.txt: [('therecommendations', 'the recommendations')]
AmSn18900206-V05-06-page7.txt: [('SendittoYourFriends', 'Send it to Your Friends'), ('Papacywasfullydeveloped', 'Papacy was fully developed')]
AmSn18900206-V05-06-page8.txt: [('singlosubsoription', 'sing lo sub so rip t i o n')]
AmSn18900213-V05-07-page4.txt: [('barefacedmisrepresentationandbytheidr', 'barefaced misrepresentation and by the i dr')]
AmSn18900213-V05-07-page7.txt: [('furtherinformation', 'further information')]
AmSn18900306-V05-10-page1.txt: [('THEAMERICANSENTINEL', 'THE AMERICAN SENTINEL')]
AmSn18900320-V05-12-page6.txt: [('inestimableblessings', 'inestimable blessings')]
AmSn18900320-V05-12-page7.txt: [('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')]
AmSn18900327-V05-13-page7.txt: [('PresbyterianChurches', 'Presbyterian Churches')]
AmSn18900403-V05-14-page5.txt: [('politicalreligious', 'political religious')]
AmSn18900410-V05-15-page7.txt: [('UNITEDSTATESCONSTITUTION', 'UNITED STATES CONSTITUTION'), ('pieceandSIXBEAUTIFULLYCOLOREDPLATES', 'piece and SIX BEAUTIFULLY COLORED PLATES'), ('containinghotiseholdand', 'containing hot is e h o l d a n d')]
AmSn18900410-V05-15-page8.txt: [('beuncompromisinglyopposed', 'be uncompromisingly opposed')]
AmSn18900417-V05-16-page7.txt: [('UNITEDSTATESCONSTITUTION', 'UNITED STATES CONSTITUTION')]
AmSn18900424-V05-17-page8.txt: [('singlesubscription', 'single subscription')]
AmSn18900508-V05-19-page5.txt: [('nationalcharacteristic', 'national characteristic'), ('spiritualmindedness', 'spiritual mindedness')]
AmSn18900515-V05-20-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18900522-V05-21-page2.txt: [('TOhimthatworkethnotbutbelieveth', 'TO him that worketh not but believeth'), ('Forbearingoneanotherandforgivingoneanon', 'For bearing one another and forgiving one an on')]
AmSn18900522-V05-21-page6.txt: [('Congresstoopposetheadoptionofthejointresolution', 'Congress to oppose the adoption of the joint resolution')]
AmSn18900529-V05-22-page7.txt: [('idsimpleconstruction', 'ids imp le construction')]
AmSn18900605-V05-23-page7.txt: [('PACIFICPRESSMUSSINGCO', 'PACIFIC PRESS M U S S I N G C O'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY'), ('EstablishedonFourContinents', 'Established on Four Continents')]
AmSn18900612-V05-24-page7.txt: [('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')]
AmSn18900626-V05-26-page7.txt: [('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')]
AmSn18900717-V05-28-page7.txt: [('EstablishedonFourContinents', 'Established on Four Continents')]
AmSn18900731-V05-30-page7.txt: [('similarstylesformerlysoldandstiltofferedatmuch', 'similar styles formerly sold and stilt offered at much'), ('Verbatimreportofthespeechesof', 'Verbatim report of the speeches of')]
AmSn18900807-V05-31-page1.txt: [('authoritativelywhat', 'authoritatively what')]
AmSn18900807-V05-31-page7.txt: [('TheSalemWitchcraft', 'The Salem Witchcraft'), ('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION')]
AmSn18900821-V05-33-page7.txt: [('BattleCreekBakeryCo', 'Battle Creek Bakery C o'), ('CIVILGOVERNMENTANDRELIGIONDUPLICATING', 'CIVIL GOVERNMENT AND RELIGION DUPLICATING')]
AmSn18900828-V05-34-page7.txt: [('MERICANINSTITUTIONS', 'MER I CAN INSTITUTIONS'), ('CIVILGOVERNMENTANDRELIGIONDUPLICATING', 'CIVIL GOVERNMENT AND RELIGION DUPLICATING')]
AmSn18900918-V05-37-page1.txt: [('compelleartaLligten', 'compel le art a L l i g t e n')]
AmSn18900918-V05-37-page7.txt: [('Sentbymailonreceiptofprice', 'Sent by mail on receipt of price')]
AmSn18900925-V05-38-page7.txt: [('HEALTHFOODCRACKERS', 'HEALTH FOOD CRACKERS')]
AmSn18901002-V05-39-page1.txt: [('whateverappropriation', 'whatever appropriation')]
AmSn18901002-V05-39-page7.txt: [('Constitutionsoldiery', 'Constitution soldiery')]
AmSn18901009-V05-40-page8.txt: [('weasChristianworkersinthecauseofChristdeem', 'we as Christian workers in the cause of Christ deem'), ('UnitedStatesConstitutionasit', 'United States Constitution as it')]
AmSn18901023-V05-42-page1.txt: [('correspondentadmits', 'correspondent admits')]
AmSn18901023-V05-42-page4.txt: [('ionastotherightOfGovernmenttocorrect', 'ion as to the right Of Government to correct')]
AmSn18901030-V05-43-page7.txt: [('Ioutlinesvividlytherelationthatexistedbetween', 'I outlines vividly the relation that existed between'), ('TheAmericanSabbathUnion', 'The American Sabbath Union')]
AmSn18901113-V05-45-page6.txt: [('Thenriollownumerous', 'Then rio l low numerous')]
AmSn18901113-V05-45-page7.txt: [('THENATIONALSUNDAYLAWbanned', 'THE NATIONAL SUNDAY LAW ban ned')]
AmSn18901218-V05-50-page10.txt: [('revolutionaryresolution', 'revolutionary resolution')]
AmSn18910108-V06-02-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY'), ('OfassortedHealthFoodCrackerssentpost', 'Of assorted Health Food Crackers sent post'), ('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18910115-V06-03-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY'), ('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18910212-V06-07-page3.txt: [('certainunalienable', 'certain unalienable')]
AmSn18910212-V06-07-page7.txt: [('HEALTHFOODCRACKERS', 'HEALTH FOOD CRACKERS')]
AmSn18910226-V06-09-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910312-V06-11-page7.txt: [('furtherparticulars', 'further particulars')]
AmSn18910319-V06-12-page7.txt: [('PEOPLINGOFTHEEARTH', 'PEOPLING OF THE EARTH'), ('UNITEDSTATESCONSTITUTION', 'UNITED STATES CONSTITUTION')]
AmSn18910319-V06-12-page8.txt: [('singlesubscription', 'single subscription')]
AmSn18910402-V06-14-page7.txt: [('PEOPLINGOFTHEEARTH', 'PEOPLING OF THE EARTH')]
AmSn18910409-V06-15-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910409-V06-15-page8.txt: [('specialarrangement', 'special arrangement')]
AmSn18910416-V06-16-page6.txt: [('inscriptionianitig', 'inscription ian it i g'), ('differentreligious', 'different religious')]
AmSn18910416-V06-16-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910430-V06-18-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910507-V06-19-page7.txt: [('CIVILGOVERNMENTANDRELIGION', 'CIVIL GOVERNMENT AND RELIGION'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910521-V06-21-page7.txt: [('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18910723-V06-29-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18910730-V06-30-page7.txt: [('THESENTINELLIBRARY', 'THE SENTINEL LIBRARY')]
AmSn18910827-V06-34-page6.txt: [('ReligiousLibertyAssociation', 'Religious Liberty Association')]
AmSn18910903-V06-35-page7.txt: [('STHEPAPACYINPROPHECY', 'S THE PAPACY IN PROPHECY')]
AmSn18910910-V06-36-page7.txt: [('TheHouseholdMonitorofHealth', 'The Household Monitor of Health'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')]
AmSn18910917-V06-37-page7.txt: [('TheHouseholdMonitorofHealth', 'The Household Monitor of Health'), ('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY')]
AmSn18910924-V06-38-page7.txt: [('TheHouseholdMonitorofHealth', 'The Household Monitor of Health'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18911008-V06-39-page7.txt: [('ThunderingCataract', 'Thundering Cat ar act'), ('TheHouseholdWalterofHealth', 'The Household Walter of Health'), ('ISTHEPAPACYINPROPHECY', 'IS THE PAPACY IN PROPHECY')]
AmSn18911015-V06-40-page7.txt: [('BYWILLIAMJACKSONARMSTRONG', 'BY WILLIAM JACKSON ARMSTRONG')]
AmSn18911029-V06-42-page7.txt: [('ROMANISMANDCIVILLIBERTY', 'ROMANISM AND CIVIL LIBERTY'), ('LateCommissionerof', 'Late Commissioner of')]
AmSn18911224-V06-50-page7.txt: [('petitionsoftheLongIslandfishermen', 'petitions of the Long Island fishermen')]
AmSn18920107-V07-01-page6.txt: [('denominationalists', 'denomination a lists')]
AmSn18920204-V07-05-page7.txt: [('andYOUshouldreadit', 'and YOU should read it')]
AmSn18920211-V07-06-page1.txt: [('spiritualmindedness', 'spiritual mindedness')]
AmSn18920218-V07-07-page3.txt: [('politicalinstitutions', 'political institutions')]
AmSn18920310-V07-10-page8.txt: [('disestablishmentmay', 'disestablishment may')]
AmSn18920324-V07-12-page6.txt: [('Sundayconcertintheoperahouse', 'Sunday concert in the opera house'), ('theCatholicChurchcannotdoany', 'the Catholic Church cannot do any')]
AmSn18920324-V07-12-page7.txt: [('SAMPLECOPIESMAILED', 'SAMPLE COPIES MAI L ED')]
AmSn18920407-V07-14-page7.txt: [('everypersoninterestedinmissionaryworkathome', 'every person interested in missionary work at home')]
AmSn18920428-V07-17-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')]
AmSn18920526-V07-21-page7.txt: [('SAMPLECOPIESMAILED', 'SAMPLE COPIES MAI L ED')]
AmSn18920623-V07-25-page6.txt: [('NATIONALRELIGIOUSLIBERTY', 'NATIONAL RELIGIOUS LIBERTY'), ('representativesfromdifferentpar', 'representatives from different par')]
AmSn18920630-V07-26-page3.txt: [('protectionaccorded', 'protection accorded')]
AmSn18920721-V07-28-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')]
AmSn18920728-V07-29-page5.txt: [('represeneverlasting', 'rep res en everlasting')]
AmSn18920811-V07-31-page4.txt: [('therepresentatives', 'the representatives')]
AmSn18920811-V07-31-page7.txt: [('theultrawingofthechurchpeople', 'the ultra wing of the church people'), ('thegroundsandartgalleriesmightbeopen', 'the grounds and art galleries might be open')]
AmSn18921006-V07-39-page7.txt: [('ITISACOMPLETEHISTORICALANDDESCRIPTIVESUMMARY', 'IT IS A COMPLETE HISTORICAL AND DESCRIPTIVE SUMMARY')]
AmSn18921013-V07-40-page8.txt: [('receivingTHEAMERICANSENTINEL', 'receiving THE AMERICAN SENTINEL')]
AmSn18921020-V07-41-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')]
AmSn18921027-V07-42-page6.txt: [('nineteentwentieths', 'nineteen twentieth s')]
AmSn18921027-V07-42-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')]
AmSn18921117-V07-45-page7.txt: [('BOTHOLDANDNEWSUBSCRIBERS', 'BOTH OLD AND NEW SUBSCRIBERS')]
AmSn18921124-V07-46-page6.txt: [('Thereisnosafetyforourcountry', 'There is no safety for our country'), ('andsuperiorgoodness', 'and superior goodness'), ('SecretaryAmericanSecuraUnion', 'Secretary American Sec ur a Union')]
AmSn18921201-V07-47-page2.txt: [('disconnectedherself', 'disconnected herself')]
AmSn18930126-V08-04-page7.txt: [('persecutionbothinRussiaandGermany', 'persecution both in Russia and Germany'), ('notbeopenedonSunday', 'not be opened on Sunday'), ('isunconstitutionaland', 'is unconstitutional and')]
AmSn18930223-V08-08-page7.txt: [('thattheyareinerror', 'that they are in error'), ('andthatthelawsofthe', 'and that the laws of the')]
AmSn18930309-V08-10-page6.txt: [('Associationssentafrom', 'Associations sent a from')]
AmSn18930309-V08-10-page7.txt: [('pointofdisturbance', 'point of disturbance'), ('Itwasshowntohimandto', 'It was shown to him and to')]
AmSn18930323-V08-12-page1.txt: [('Governmentappointed', 'Government appointed')]
AmSn18930608-V08-23-page8.txt: [('JudgeSteinoverruledthepleathat', 'Judge Stein overruled the plea that'), ('Itmakesallthedifferencein', 'It makes all the difference in')]
AmSn18930615-V08-24-page5.txt: [('recentlyinterviewed', 'recently interviewed')]
AmSn18930622-V08-25-page7.txt: [('BibleObjectLessons', 'Bible Object Lessons'), ('SongsforLittleOnes', 'Songs for Little Ones')]
AmSn18930629-V08-26-page7.txt: [('BATTLECREEKBAKERYCO', 'BATTLE CREEK BAKERY C O')]
AmSn18930706-V08-27-page7.txt: [('AReportoftheHearingontheSundayClosingof', 'A Report of the Hearing on the Sunday Closing of'), ('ItEnablesEveryManandWoman', 'It Enables Every Man and Woman')]
AmSn18930713-V08-28-page7.txt: [('BATTLECREEKBAKERYCO', 'BATTLE CREEK BAKERY C O')]
AmSn18930713-V08-28-page8.txt: [('interestingthisweek', 'interesting this week')]
AmSn18930720-V08-29-page6.txt: [('religiouspersecution', 'religious persecution')]
AmSn18930727-V08-30-page6.txt: [('thejudgmentmayfallonanypartofasinfulnation', 'the judgment may fall on any part of a sinful nation')]
AmSn18930803-V08-31-page7.txt: [('TheGemofHealthFoods', 'The Gem of Health Foods'), ('OurbabyisatestimonialtoSanitariumfood', 'Our baby is a testimonial to Sanitarium food'), ('andisasruddyandhealthya', 'and is as ruddy and healthy a')]
AmSn18930810-V08-32-page7.txt: [('AppealandRemonstrance', 'Appeal and Remonstrance'), ('theFlorentineMartyr', 'the Florentine Martyr')]
AmSn18930817-V08-33-page7.txt: [('TheGemofHealthFoods', 'The Gem of Health Foods')]
AmSn18930907-V08-35-page7.txt: [('TheGemofHealthFoods', 'The Gem of Health Foods')]
AmSn18930921-V08-37-page8.txt: [('feWoriiiiiresseTatthe', 'feW or iii i ir esse Tat the')]
AmSn18931116-V08-45-page7.txt: [('adaptthelifeofChristtothe', 'adapt the life of Christ to the')]
AmSn18931123-V08-46-page7.txt: [('SPECIALHOLIDAYOFFER', 'SPECIAL HOLIDAY OFFER')]
AmSn18931221-V08-50-page7.txt: [('Bibleillustrationsandstoriesthatwehavebeen', 'Bible illustrations and stories that we have been')]
AmSn18940125-V09-04-page1.txt: [('thechurchforAmerica', 'the church for America'), ('thenaturalallianceexisting', 'the natural alliance existing')]
AmSn18940215-V09-07-page7.txt: [('andtobringdowndamnationandcursesuponevery', 'and to bring down damnation and curses upon every'), ('Couldfollybegreaterthanpraying', 'Could folly be greater than praying')]
AmSn18940329-V09-13-page7.txt: [('withgeographicalstatisticalnotes', 'with geographical statistical notes')]
AmSn18940412-V09-15-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18940419-V09-16-page5.txt: [('underacknowledgment', 'under acknowledgment')]
AmSn18940524-V09-21-page4.txt: [('addireadytobreakandoverwhelmitinsocial', 'addi ready to break and overwhelm it in social')]
AmSn18940621-V09-25-page7.txt: [('isthesameinallagesoftheworld', 'is the same in all ages of the world')]
AmSn18940802-V09-31-page2.txt: [('toexerciseanycoercionwhatever', 'to exercise any coercion whatever')]
AmSn18940823-V09-33-page8.txt: [('uncompromisinglyopposed', 'uncompromisingly opposed')]
AmSn18941004-V09-39-page5.txt: [('receivesappropriations', 'receives appropriations')]
AmSn18941018-V09-41-page3.txt: [('anxietyoftheSundaypreachersisto', 'anxiety of the Sunday preachers is to'), ('nomorethanfollowing', 'no more than following')]
AmSn18941018-V09-41-page4.txt: [('carriedoutinthenameofthewholeCatholiccommu', 'carried out in the name of the whole Catholic com m u')]
AmSn18941101-V09-43-page8.txt: [('convertedintoaholiday', 'converted into a holiday'), ('Thelawswhichprotectitare', 'The laws which protect it are')]
AmSn18941122-V09-46-page7.txt: [('ACRITICALHARMONYoftheGOSPELS', 'A CRITICAL HARMONY of the GOSPELS')]
AmSn18941206-V09-48-page8.txt: [('fromtheAdventistprintingofficeistheSchutzenplatz', 'from the Adventist printing office is the S c hut z e n p l a t z')]
AmSn18941213-V09-49-page7.txt: [('comingsolongastheservicesareheld', 'coming so long as the services are held'), ('wanttopraylongandprayearnestlyand', 'want to pray long and pray earnestly and'), ('comeoutopenlyinfavorofreligiousedu', 'come out openly in favor of religio use du'), ('hassoclearlyspoken', 'has so clearly spoken'), ('andhebaselyintimates', 'and he basely intimates'), ('WillpolishastovebetterClumany', 'Will polish a stove better C l u m a n y')]
AmSn18950117-V10-03-page1.txt: [('illustriousvisitor', 'illustrious visitor')]
AmSn18950124-V10-04-page7.txt: [('tenderrestsupontendrive', 'tender rests upon ten drive')]
AmSn18950131-V10-05-page3.txt: [('merieanStatePapers', 'me rie an State Papers')]
AmSn18950307-V10-10-page2.txt: [('notthelawsregardingSundayobservanceaconcession', 'not the laws regarding Sunday observance a concession'), ('wouldbecomeuniversal', 'would become universal')]
AmSn18950425-V10-17-page6.txt: [('GoodSubstantialHighGradeBICY', 'Good Substantial High Grade BIC Y'), ('includingtheologians', 'including theologians')]
AmSn18950509-V10-19-page4.txt: [('whatabouttheindividualwho', 'what about the individual who'), ('Transvaalgovernment', 'Transvaal government')]
AmSn18950620-V10-25-page1.txt: [('Wemustthereforeconcludethatthe', 'We must therefore conclude that the')]
AmSn18950725-V10-30-page7.txt: [('HISGLORIOUSAPPEARING', 'HIS GLORIOUS APPEARING')]
AmSn18950725-V10-30-page8.txt: [('anduncompromisingly', 'and uncompromisingly')]
AmSn18950822-V10-33-page5.txt: [('lieutenantgovernor', 'lieutenant governor')]
AmSn18950822-V10-33-page7.txt: [('dayAdventistsandthecourts', 'day Adventists and the courts')]
AmSn18950926-V10-38-page5.txt: [('domiriatelegislation', 'dom iri ate legislation')]
AmSn18951010-V10-40-page7.txt: [('muchtoitseducationalvalue', 'much to its educational value')]
AmSn18951017-V10-41-page7.txt: [('andtheschemewentthrough', 'and the scheme went through'), ('andthesehavebeenturnedover', 'and these have been turned over'), ('anddraggedhimofftojail', 'and dragged him off to jail'), ('andtoforbideverythingwhichisnot', 'and to forbid everything which is not'), ('theonlydangerwhichthreatenstheliberties', 'the only danger which threatens the liberties')]
AmSn18951031-V10-43-page7.txt: [('butwhetherprophecies', 'but whether prophecies'), ('theyshallbedoneaway', 'they shall be done away')]
AmSn18951121-V10-46-page7.txt: [('andveryseldomevenin', 'and very seldom even in')]
AmSn18951128-V10-47-page3.txt: [('fellowcommissioners', 'fellow commissioners')]
AmSn18951128-V10-47-page8.txt: [('THERIGHTSofthePEOPLE', 'THE RIGHTS of the PEOPLE')]
AmSn18951219-V10-50-page7.txt: [('waspresentatthetrialoutlinestheproceed', 'was present at the trial outlines the proceed')]
AmSn18960109-V11-02-page7.txt: [('andalsooftheDominionof', 'and also of the Dominion of'), ('Othermakesmaybegood', 'Other makes may be good'), ('buttheWATERMANISTHEBEST', 'but the WATERMAN IS THE BEST')]
AmSn18960319-V11-12-page7.txt: [('SweepingEverything', 'Sweeping Everything')]
AmSn18960326-V11-13-page7.txt: [('SweepingEverything', 'Sweeping Everything')]
AmSn18960402-V11-14-page3.txt: [('RELIGIOUSintolerance', 'RELIGIOUS intolerance')]
AmSn18960409-V11-15-page8.txt: [('Itfollowsthatthisisnot', 'It follows that this is not')]
AmSn18960416-V11-16-page8.txt: [('whichisperfectlyproperifthey', 'which is perfectly proper if they')]
AmSn18960430-V11-18-page6.txt: [('Sabbathdesecration', 'Sabbath desecration')]
AmSn18960430-V11-18-page7.txt: [('AttractiveBoardCover', 'Attractive Board Cover')]
AmSn18960514-V11-20-page7.txt: [('andpronouncesthemwith', 'and pronounces them with'), ('tobemightyupontheearth', 'to be mighty upon the earth')]
AmSn18960702-V11-26-page7.txt: [('enablesittopromotethehealth', 'enables it to promote the health')]
AmSn18960730-V11-30-page7.txt: [('ButthecommandmentsofGodapplytothe', 'But the commandments of God apply to the'), ('FormtheGREATTHROUGHLINEtoall', 'Form the GREAT THROUGH LINE to all'), ('TWOCANNIBALARCHIPELAGOES', 'TWO CANNIBAL ARCHIPELAGOES')]
AmSn18960813-V11-32-page4.txt: [('controversieswhich', 'controversies which')]
AmSn18960903-V11-35-page2.txt: [('entitledtoallthebenefitsofcivilsocieyt', 'entitled to all the benefits of civil so c i e y t')]
AmSn18961001-V11-39-page7.txt: [('GeorgeFredWilliams', 'George Fred Williams'), ('ALeaderoftheNewDemocracy', 'A Leader of the New Democracy'), ('Othermakesmaybegood', 'Other makes may be good'), ('buttheWATERMANISTHEBEST', 'but the WATERMAN IS THE BEST')]
AmSn18961015-V11-41-page7.txt: [('SendforCircularandTerms', 'Send for Circular and Terms'), ('Stampswillbeacceptableforsmallremit', 'Stamps will be acceptable for small remit')]
AmSn18961119-V11-46-page2.txt: [('closingdecadeofthenineteenthcentury', 'closing decade of the nineteenth century')]
AmSn18961126-V11-47-page4.txt: [('Religiousestablishments', 'Religious establishments')]
AmSn18961210-V11-49-page7.txt: [('withhandsomedesignincolors', 'with handsome design in colors')]
AmSn18970121-V12-03-page10.txt: [('wouldbeunchristian', 'would be unchristian')]
AmSn18970121-V12-03-page11.txt: [('inseparablerelation', 'inseparable relation')]
AmSn18970121-V12-03-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18970128-V12-04-page16.txt: [('NationalConstitution', 'National Constitution')]
AmSn18970204-V12-05-page13.txt: [('ecclesiasticalinstitution', 'ecclesiastical institution')]
AmSn18970211-V12-06-page3.txt: [('theseextraordinary', 'these extraordinary')]
AmSn18970311-V12-10-page15.txt: [('NationalConstitution', 'National Constitution')]
AmSn18970311-V12-10-page4.txt: [('Congregationalchurch', 'Congregational church')]
AmSn18970318-V12-11-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O'), ('ABookfortheChildren', 'A Book for the Children')]
AmSn18970325-V12-12-page8.txt: [('unalterableprinciple', 'unalterable principle')]
AmSn18970401-V12-13-page16.txt: [('thisbookreadingismadeeasy', 'this book reading is made easy'), ('MyBiblereceivedthis', 'My Bible received this')]
AmSn18970408-V12-14-page5.txt: [('TheItaliangovernment', 'The Italian government')]
AmSn18970415-V12-15-page15.txt: [('gasternPPaasssenger', 'gas tern P P a ass sen ger')]
AmSn18970422-V12-16-page16.txt: [('PracticalReference', 'Practical Reference')]
AmSn18970506-V12-18-page16.txt: [('thoroughlyintroduce', 'thoroughly introduce')]
AmSn18970513-V12-19-page1.txt: [('fundamentalprinciples', 'fundamental principles')]
AmSn18970520-V12-20-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18970520-V12-20-page16.txt: [('Ifyoupreferthehalf', 'If you prefer the half'), ('themonthlypaymentwillbe', 'the monthly payment will be')]
AmSn18970603-V12-22-page12.txt: [('religiousinstruction', 'religious instruction')]
AmSn18970610-V12-23-page5.txt: [('interdenominationalism', 'inter denominationalism')]
AmSn18970624-V12-25-page10.txt: [('approvingconscience', 'approving conscience')]
AmSn18970624-V12-25-page5.txt: [('strictlyeconomical', 'strictly economical')]
AmSn18970701-V12-26-page14.txt: [('PACIFICPRESSPUBLISHINGCP', 'PACIFIC PRESS PUBLISHING C P')]
AmSn18970707-V12-27-page15.txt: [('InvaluabletoBibleandHistoryStudents', 'Invaluable to Bible and History Students')]
AmSn18970722-V12-29-page2.txt: [('advancingevolution', 'advancing evolution')]
AmSn18970722-V12-29-page4.txt: [('endangeringAmerican', 'endangering American')]
AmSn18970805-V12-31-page9.txt: [('gamblingfraternity', 'gambling fraternity')]
AmSn18970819-V12-33-page3.txt: [('ilvyettoAnaximandercreditfortheinventionofathin', 'i lv yet to Anaximander credit for the invention of a thin')]
AmSn18970916-V12-36-page16.txt: [('TheBiblegivenasapresentforsixNEWyearlysubscriptionsat', 'The Bible given as a present for six NEW yearly subscriptions at')]
AmSn18971007-V12-39-page16.txt: [('AstothemeritsoftheBibleweoffer', 'As to the merits of the Bible we offer'), ('IhavereceivedmyBible', 'I have received my Bible'), ('andtosayIamwellpleasedwould', 'and to say I am well pleased would')]
AmSn18971014-V12-40-page14.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18971014-V12-40-page16.txt: [('ItisjustwhatIhavelongwanted', 'It is just what I have long wanted'), ('IreceivedtheBibleingoodcondition', 'I received the Bible in good condition')]
AmSn18971021-V12-41-page14.txt: [('expensesreasonable', 'expenses reasonable')]
AmSn18971028-V12-42-page16.txt: [('ofpageitwillbeseenthattheRevisedsieornreads', 'of page it will be seen that the Revised s i e o r n r e a d s')]
AmSn18971028-V12-42-page2.txt: [('dangerousnegligence', 'dangerous negligence')]
AmSn18971028-V12-42-page9.txt: [('politicalcorruption', 'political corruption')]
AmSn18971111-V12-44-page1.txt: [('democraticgovernment', 'democratic government')]
AmSn18971111-V12-44-page7.txt: [('thegreatestreformer', 'the greatest reformer')]
AmSn18971125-V12-46-page14.txt: [('especiallyforconvertingnutsintobutterfor', 'especially for converting nuts into butter for')]
AmSn18971209-V12-48-page4.txt: [('probablyinfluenced', 'probably influenced')]
AmSn18971230-V12-50-page13.txt: [('differencelbetween', 'difference l between')]
AmSn18980106-V13-01-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18980113-V13-02-page15.txt: [('IllustratingtheTravelsofPaul', 'Illustrating the Travels of Paul')]
AmSn18980113-V13-02-page16.txt: [('togiveourpatronsabenefit', 'to give our patrons a benefit')]
AmSn18980120-V13-03-page14.txt: [('DameFashionandHerSlaves', 'Dame Fashion and Her Slaves'), ('HealthandTemperanceMiscellany', 'Health and Temperance Miscellany')]
AmSn18980120-V13-03-page3.txt: [('individualChristians', 'individual Christians')]
AmSn18980120-V13-03-page6.txt: [('heavenlycitizenship', 'heavenly citizenship')]
AmSn18980127-V13-04-page14.txt: [('oftheCYCLONECONTINUES', 'of the CYCLONE CONTINUES')]
AmSn18980203-V13-05-page14.txt: [('IllustratingtheTravelsofPaul', 'Illustrating the Travels of Paul')]
AmSn18980210-V13-06-page8.txt: [('righteousnessright', 'righteousness right')]
AmSn18980210-V13-06-page9.txt: [('Onstratethepropriety', 'On st rate the propriety')]
AmSn18980217-V13-07-page15.txt: [('Anothervaluablefeature', 'Another valuable feature')]
AmSn18980224-V13-08-page6.txt: [('ourChristiancharacter', 'our Christian character')]
AmSn18980303-V13-09-page14.txt: [('bottomwithmetalandready', 'bottom with metal and ready')]
AmSn18980303-V13-09-page7.txt: [('EstablishingReligious', 'Establishing Religious')]
AmSn18980310-V13-10-page14.txt: [('Catalogueofallourstyles', 'Catalogue of all our styles'), ('CatarrhInhalerFree', 'Catarrh Inhaler Free')]
AmSn18980331-V13-13-page14.txt: [('IllustratingtheTravelsofPaul', 'Illustrating the Travels of Paul')]
AmSn18980331-V13-13-page15.txt: [('Anothervaluablefeature', 'Another valuable feature')]
AmSn18980407-V13-14-page12.txt: [('covetousdisposition', 'covetous disposition')]
AmSn18980414-V13-15-page14.txt: [('Communicationsstrictly', 'Communications strictly')]
AmSn18980512-V13-19-page6.txt: [('intermeddlehimself', 'intermeddle himself')]
AmSn18980602-V13-22-page7.txt: [('Christiansunconsciously', 'Christians unconsciously'), ('Catholicsunderstand', 'Catholics understand')]
AmSn18980609-V13-23-page14.txt: [('SolidVestibuledPullmanDiningandSleepingCarTrains', 'Solid Vestibuled Pullman Dining and Sleeping Car Trains')]
AmSn18980609-V13-23-page15.txt: [('hethatspeakswithatongue', 'he that speaks with a tongue')]
AmSn18980609-V13-23-page9.txt: [('civicrighteousness', 'civic righteousness')]
AmSn18980616-V13-24-page14.txt: [('ManualofParliamentaryRules', 'Manual of Parliamentary Rules')]
AmSn18980616-V13-24-page15.txt: [('weseethroughaglass', 'we see through a glass')]
AmSn18980623-V13-25-page3.txt: [('correctlyrepresent', 'correctly represent')]
AmSn18980623-V13-25-page5.txt: [('questiondestruction', 'question destruction')]
AmSn18980630-V13-26-page14.txt: [('handsomelyillustrated', 'handsomely illustrated')]
AmSn18980811-V13-31-page3.txt: [('PhilippineArchipelago', 'Philippine Archipelago')]
AmSn18980818-V13-32-page8.txt: [('establishingAmerican', 'establishing American')]
AmSn18980915-V13-36-page15.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18980922-V13-37-page11.txt: [('smallconsideration', 'small consideration')]
AmSn18980929-V13-38-page2.txt: [('voluntarilydescended', 'voluntarily descended')]
AmSn18981006-V13-39-page14.txt: [('PACIFICPRESSPUBLISHINGCO', 'PACIFIC PRESS PUBLISHING C O')]
AmSn18981110-V13-44-page14.txt: [('reeFeoiredyeiaerrsefe', 'ree Fe o ire dye i a errs e f e')]
AmSn18981110-V13-44-page3.txt: [('ecclesiasticalpowers', 'ecclesiastical powers')]
AmSn18981208-V13-48-page15.txt: [('ScientificAmerican', 'Scientific American')]
AmSn18981215-V13-49-page7.txt: [('otherinsurrectionists', 'other insurrection i sts')]
AmSn18990105-V14-01-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990112-V14-02-page14.txt: [('InterestingandInstructive', 'Interesting and Instructive')]
AmSn18990112-V14-02-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990126-V14-04-page12.txt: [('biblicalChristianity', 'biblical Christianity')]
AmSn18990126-V14-04-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990126-V14-04-page16.txt: [('Americanimperialism', 'American imperialism')]
AmSn18990126-V14-04-page3.txt: [('certainiinalienable', 'certain i inalienable')]
AmSn18990202-V14-05-page5.txt: [('Americandomination', 'American domination')]
AmSn18990209-V14-06-page5.txt: [('thoroughlyfurnished', 'thoroughly furnished')]
AmSn18990316-V14-11-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990316-V14-11-page6.txt: [('earthlygovernments', 'earthly governments')]
AmSn18990330-V14-13-page10.txt: [('injusticeinseparable', 'injustice inseparable')]
AmSn18990330-V14-13-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990511-V14-19-page15.txt: [('Postagestampsaccepted', 'Postage stamps accepted')]
AmSn18990518-V14-20-page12.txt: [('appallingproportions', 'appalling proportions')]
AmSn18990518-V14-20-page13.txt: [('Ordernowandgetthebenefitofasplendidhelpin', 'Order now and get the benefit of a splendid help in')]
AmSn18990518-V14-20-page14.txt: [('InterestingandInstructive', 'Interesting and Instructive')]
AmSn18990525-V14-21-page6.txt: [('indicateverymoderate', 'indicate very moderate')]
AmSn18990608-V14-23-page13.txt: [('grosslyjdiscriminating', 'grossly j discriminating')]
AmSn18990615-V14-24-page11.txt: [('employmentelsewhere', 'employment elsewhere')]
AmSn18990713-V14-27-page13.txt: [('Oneofthemostinterestingvolumeseverpublished', 'One of the most interesting volumes ever published')]
AmSn18990727-V14-29-page11.txt: [('temporalgovernments', 'temporal governments')]
AmSn18990803-V14-30-page15.txt: [('concerningthismuch', 'concerning this much')]
AmSn18990810-V14-31-page10.txt: [('followingresolution', 'following resolution')]
AmSn18990824-V14-33-page7.txt: [('exercisethemselves', 'exercise themselves')]
AmSn18990831-V14-34-page5.txt: [('foreignnationality', 'foreign nationality')]
AmSn18990914-V14-36-page15.txt: [('IINothingBetterPublished', 'II Nothing Better Published')]
AmSn18991005-V14-39-page10.txt: [('louderdemonstrations', 'louder demonstrations')]
AmSn18991005-V14-39-page15.txt: [('Containingadditional', 'Containing additional')]
AmSn18991019-V14-41-page3.txt: [('overworkingthemselves', 'over working themselves')]
AmSn18991102-V14-43-page2.txt: [('secureirecognition', 'secure i recognition')]
AmSn18991116-V14-45-page11.txt: [('alreadyestablished', 'already established')]
AmSn18991130-V14-47-page3.txt: [('objectionableithing', 'objectionable i thing')]
AmSn19000104-V15-01-page5.txt: [('notwithstandingconstitutional', 'notwithstanding constitutional'), ('GenuineChristianity', 'Genuine Christianity')]
AmSn19000111-V15-02-page7.txt: [('ordinaryinstruments', 'ordinary instruments')]
AmSn19000111-V15-02-page8.txt: [('advertisementcaptured', 'advertisement captured')]
AmSn19000118-V15-03-page7.txt: [('independtrespassers', 'in depend trespassers')]
AmSn19000118-V15-03-page8.txt: [('renderingallegiance', 'rendering allegiance')]
AmSn19000125-V15-04-page12.txt: [('advocateindependence', 'advocate independence')]
AmSn19000201-V15-05-page4.txt: [('civilandreligiousfreedom', 'civil and religious freedom')]
AmSn19000208-V15-06-page14.txt: [('bindingforcontinuous', 'binding for continuous')]
AmSn19000208-V15-06-page2.txt: [('developmenthasvaried', 'development has varied')]
AmSn19000215-V15-07-page1.txt: [('principleunderlying', 'principle underlying')]
AmSn19000215-V15-07-page14.txt: [('beautifullystamped', 'beautifully stamped')]
AmSn19000301-V15-09-page3.txt: [('establishingtheRoman', 'establishing the Roman')]
AmSn19000315-V15-11-page10.txt: [('accustomingthemselves', 'a c custom ing themselves')]
AmSn19000315-V15-11-page13.txt: [('literaryentertainments', 'literary entertainments')]
AmSn19000315-V15-11-page6.txt: [('isdiscriminationand', 'is discrimination and'), ('discriminationshall', 'discrimination shall')]
AmSn19000405-V15-14-page3.txt: [('Politicsirepresents', 'Politics i represents')]
AmSn19000405-V15-14-page6.txt: [('languageArchbishop', 'language Archbishop')]
AmSn19000426-V15-17-page14.txt: [('StateDistinguished', 'State Dist ing u i s h e d')]
AmSn19000426-V15-17-page3.txt: [('Sundaylawsclaimthat', 'Sunday laws claim that')]
AmSn19000719-V15-28-page1.txt: [('Sabbathinstitution', 'Sabbath institution')]
AmSn19000719-V15-28-page3.txt: [('eveirtowardiaristocracy', 'eve ir toward i aristocracy'), ('magistratebelieveth', 'magistrate believeth')]
AmSn19000719-V15-28-page8.txt: [('conditionsprevailing', 'conditions prevailing')]
AmSn19000809-V15-31-page12.txt: [('theireleeterafPrOeirtil', 'the ire lee ter a f P r O e i r t i l')]
AmSn19000816-V15-32-page3.txt: [('considerablepolitical', 'considerable political')]
AmSn19000830-V15-34-page10.txt: [('constitutionallimitations', 'constitutional limitations')]
AmSn19000830-V15-34-page11.txt: [('prosecutingattorney', 'prosecuting attorney')]
AmSn19000830-V15-34-page14.txt: [('interestinghistories', 'interesting histories')]
AmSn19000913-V15-36-page14.txt: [('caYearforMedicalFees', 'c a Year for Medical Fees')]
AmSn19000920-V15-37-page8.txt: [('incompetentreligious', 'incompetent religious')]
AmSn19000928-V15-38-page5.txt: [('generacovetousness', 'genera covetousness')]
AmSn19001018-V15-41-page11.txt: [('religiOuscharaeter', 'religiOus char aet er')]
AmSn19001025-V15-42-page2.txt: [('everlastinginheritance', 'everlasting inheritance')]
AmSn19001129-V15-47-page11.txt: [('Christiancitizenship', 'Christian citizenship')]
AmSn19001129-V15-47-page5.txt: [('accomplishstatement', 'accomplish statement')]
AmSn19001206-V15-48-page10.txt: [('betfererigagerfents', 'bet fere rig a ger fe n t s')]
AmSn19001206-V15-48-page6.txt: [('appropriationonSunday', 'appropriation on Sunday')]
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/AmSn/correction9 Average verified rate: 0.9830732455807398 Average of error rates: 0.018591465234258434 Total token count: 8364805
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
[("'", 8053),
('t', 4433),
('e', 3990),
('d', 3954),
('w', 3754),
('co', 3408),
('m', 3174),
('n', 3056),
('f', 2037),
('r', 2033),
('th', 1645),
('g', 1375),
('mo', 1160),
('u', 930),
('x', 864),
('ex', 521),
('pa', 410),
('q', 399),
('sunday-law', 334),
('k', 315),
("the'", 304),
('pp', 299),
('tion', 276),
("conscience'", 260),
('ch', 253),
('seventhday', 249),
('re', 224),
('ga', 220),
('oc', 218),
('z', 217),
('wm', 215),
('satolli', 210),
('employes', 209),
('munn', 207),
('ti', 200),
('id', 181),
('un', 173),
('ry', 170),
('al', 166),
('sunday-closing', 160),
('ca', 151),
('ment', 146),
('chain-gang', 136),
("to'", 134),
('nd', 130),
('ll', 128),
('lb', 125),
('il', 123),
('bateham', 122),
('cmsar', 121)]