SOL-OCR-Evaluation-and-Correction

In [1]:
%load_ext autoreload
In [2]:
%autoreload 2
In [3]:
from text2topics import reports
from text2topics import utilities
from text2topics import clean
import re
import os
from os import listdir
from os.path import isfile, join
import collections
In [4]:
%matplotlib inline
In [5]:
wordlist_dir = "/Users/jeriwieringa/Dissertation/drafts/data/word-lists"
wordlists = ["2016-12-07-SDA-last-names.txt", 
             "2016-12-07-SDA-place-names.txt", 
             "2016-12-08-SDA-Vocabulary.txt", 
             "2017-01-03-place-names.txt", 
             "2017-02-14-Base-Word-List-SCOWL&KJV.txt",
             "2017-02-14-Roman-Numerals.txt",
             "2017-03-01-Additional-Approved-Words.txt"
            ]
In [6]:
spelling_dictionary = utilities.create_spelling_dictionary(wordlist_dir, wordlists)
In [7]:
title = "SOL"
In [8]:
base_dir = "/Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/{}/".format(title)

Baseline

In [9]:
cycle = 'baseline'
In [10]:
stats = reports.overview_report(join(base_dir, cycle), spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/SOL/baseline

Average verified rate: 0.9364626634705829

Average of error rates: 0.06729305283757338

Total token count: 1285323

In [11]:
errors_summary = reports.get_errors_summary( stats )
reports.top_errors( errors_summary, 100 )
Out[11]:
[('¥', 2308),
 ('-', 1926),
 ('tion', 1340),
 ('re-', 1275),
 ('con-', 999),
 ("'", 914),
 ('in-', 876),
 ('ment', 743),
 ('de-', 697),
 ('t', 677),
 ('¥¥', 661),
 ('sun-', 596),
 ('com-', 547),
 ('be-', 542),
 ('*', 540),
 (')', 535),
 ('n', 469),
 ('en-', 451),
 ('ñ', 438),
 ('pro-', 406),
 ('d', 405),
 ('ex-', 403),
 ('w', 398),
 ('e', 398),
 ('th', 395),
 ('tions', 346),
 ('m', 326),
 ('dis-', 310),
 ('ob-', 285),
 ('govern-', 277),
 ('gov-', 271),
 ('per-', 263),
 ('un-', 254),
 ('sab-', 252),
 ('g', 247),
 ('co', 240),
 ('ac-', 239),
 ('im-', 225),
 ('pre-', 213),
 ('ernment', 210),
 ('ligious', 209),
 ('ap-', 198),
 ('ance', 193),
 ('x', 186),
 ('f', 180),
 ('chris-', 178),
 ('sunday-closing', 175),
 ('r', 172),
 ('at-', 170),
 ('ad-', 168),
 ('al-', 164),
 ('an-', 161),
 ('(', 161),
 ('u', 145),
 ('na-', 142),
 ('as-', 139),
 ('ence', 137),
 ('ments', 136),
 ('servance', 131),
 ('ity', 127),
 ('ar-', 126),
 ('mat-', 121),
 ('relig-', 121),
 ('em-', 120),
 ('ques-', 118),
 ('legis-', 118),
 ('sunday-law', 117),
 ('tian', 117),
 ('pub-', 116),
 ('ligion', 115),
 ('ers', 115),
 ('or-', 114),
 ('forcement', 110),
 ('observ-', 110),
 ('cor-', 109),
 ('pur-', 109),
 ('sub-', 108),
 ('lic', 107),
 ('ber', 106),
 ('busi-', 106),
 ('to-', 105),
 ('coun-', 104),
 ('prin-', 104),
 ('tional', 104),
 ('legisla-', 103),
 ('ple', 103),
 ('op-', 103),
 ('them-', 102),
 ('sunday-enforcement', 102),
 ('lation', 101)]

Review Special Character Use

In [12]:
reports.tokens_with_special_characters(errors_summary)[:100]
Out[12]:
[('¥', 2308),
 ('¥¥', 661),
 ('*', 540),
 (')', 535),
 ('ñ', 438),
 ('(', 161),
 ('/', 92),
 ('•', 91),
 (']', 78),
 ('ñthe', 62),
 ('[the', 62),
 ('(sunday)', 61),
 ('%', 60),
 ('ó', 52),
 ('_', 47),
 ('(the', 42),
 ('+', 37),
 ('ã', 36),
 ('[of', 31),
 ('¥¥¥', 29),
 ('ñeditor', 29),
 ('=', 28),
 ('**', 27),
 ('[', 27),
 ('è', 25),
 ('combinationñthe', 24),
 ('republicsñrome', 24),
 ('(and', 24),
 ('(which', 23),
 ('%c', 23),
 ('••', 23),
 ('ña', 21),
 ('ñthat', 21),
 ('ö', 21),
 ('-¥', 20),
 ('(see', 20),
 ('(minn', 20),
 ('(n', 20),
 ('\\', 20),
 ('sundayñno', 19),
 ('`', 19),
 ('ô', 19),
 ('¥-', 19),
 ('¥+¥', 19),
 ('ñnew', 18),
 ('ñat', 18),
 ('*the', 17),
 ('the¥', 16),
 ('*¥', 16),
 ('(mass', 15),
 ('>', 15),
 ('ñdr', 15),
 ('ñid', 15),
 ('ñit', 15),
 ('(continuing', 14),
 ('(as', 14),
 ('#', 14),
 ('to¥', 14),
 ('ñfrom', 14),
 ('\ufeff', 13),
 ('¥the', 13),
 ('ñsee', 12),
 ('¡', 11),
 ('page)', 11),
 ('¥*', 11),
 ('ñto', 11),
 ('¥-¥', 11),
 ('(a', 11),
 ('(iowa)', 11),
 ('(although', 11),
 ('-)', 11),
 ('sunday]', 10),
 ('ñ_', 10),
 ('day)', 10),
 ('(full', 10),
 ('sentinel)', 10),
 ('sideñsome', 9),
 ('[in', 9),
 ('ñin', 9),
 ('(not', 9),
 ('sunday=law', 9),
 ('(i)', 9),
 ('(of', 9),
 ('theñ', 9),
 ('<', 8),
 ('[to', 8),
 ('¥¥¥¥¥', 8),
 ('i¥', 8),
 ('ñrev', 8),
 ('(unitarian)', 8),
 ('(ohio)', 8),
 ("'¥", 8),
 ('(b)', 8),
 ('(a)', 8),
 ('(ind', 8),
 ('***', 8),
 ('(wis', 8),
 ('(in', 8),
 ('¥t¥', 8),
 ('ñmilman', 8)]

Correction 1 -- Normalize Special Characters

In [13]:
# %load shared_elements/normalize_characters.py
prev = "baseline"
cycle = "correction1"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    # Substitute for all other dashes
    content = re.sub(r"—-—–‑", r"-", content)

    # Substitute formatted apostrophe
    content = re.sub(r"\’\’\‘\'\‛\´", r"'", content)
    
    # Replace all special characters with a space (as these tend to occur at the end of lines)
    content = re.sub(r"[^a-zA-Z0-9\s,.!?$:;\-&\'\"]", r" ", content)
    
    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [14]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/SOL/correction1

Average verified rate: 0.9441270584562553

Average of error rates: 0.058811154598825834

Total token count: 1281300

In [15]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[15]:
[('-', 2104),
 ('tion', 1347),
 ('re-', 1276),
 ('con-', 999),
 ("'", 967),
 ('in-', 877),
 ('ment', 746),
 ('t', 738),
 ('de-', 697),
 ('sun-', 601),
 ('com-', 548),
 ('be-', 542),
 ('n', 502),
 ('en-', 452),
 ('e', 437),
 ('d', 414),
 ('w', 410),
 ('pro-', 406),
 ('ex-', 403),
 ('th', 397),
 ('tions', 348),
 ('m', 330),
 ('dis-', 310),
 ('ob-', 285),
 ('govern-', 277),
 ('gov-', 272),
 ('per-', 264),
 ('g', 256),
 ('un-', 255),
 ('sab-', 254),
 ('co', 240),
 ('ac-', 239),
 ('im-', 226),
 ('x', 226),
 ('pre-', 213),
 ('ernment', 212),
 ('ligious', 210),
 ('ap-', 199),
 ('f', 197),
 ('r', 196),
 ('ance', 193),
 ('chris-', 178),
 ('sunday-closing', 176),
 ('at-', 171),
 ('ad-', 168),
 ('al-', 164),
 ('an-', 162),
 ('u', 145),
 ('na-', 144),
 ('as-', 140)]

Correction 2 -- Correct Line Endings

In [16]:
# %load shared_elements/correct_line_endings.py
prev = cycle
cycle = "correction2"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    content = re.sub(r"(\w+)(\-\s{1,})([a-z]+)", r"\1\3", content)

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [17]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/SOL/correction2

Average verified rate: 0.9814883735076206

Average of error rates: 0.02238747553816047

Total token count: 1249431

In [18]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[18]:
[('-', 2090),
 ("'", 967),
 ('t', 737),
 ('n', 502),
 ('e', 435),
 ('d', 414),
 ('w', 410),
 ('th', 397),
 ('m', 330),
 ('g', 255),
 ('co', 239),
 ('x', 226),
 ('sunday-closing', 201),
 ('f', 196),
 ('r', 196),
 ('u', 145),
 ('sunday-enforcement', 134),
 ('sunday-law', 123),
 ("clerks'", 96),
 ('pa', 96),
 ('mo', 77),
 ('k', 65),
 ('io', 56),
 ('z', 53),
 ("'the", 53),
 ('saloon-keepers', 51),
 ('oo', 50),
 ("barbers'", 49),
 ('--', 49),
 ("grocers'", 45),
 ('wm', 44),
 ('mutchler', 42),
 ('church-and-state', 41),
 ('tion', 40),
 ('loth', 38),
 ('law-abiding', 36),
 ('street-cars', 35),
 ('mass-meeting', 33),
 ('non-receipt', 31),
 ("jones'", 29),
 ('e-z', 28),
 ('q', 28),
 ('non-sectarian', 27),
 ('thread-thought', 27),
 ('farmakis', 27),
 ("o'gorman", 27),
 ('saloonmen', 27),
 ('fellow-citizens', 26),
 ('sundayclosing', 26),
 ('lc', 25)]

Correction 3 -- Remove extra dashes

In [19]:
# %load shared_elements/remove_extra_dashes.py
prev = cycle
cycle = "correction3"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    replacements = []
    for token in tokens:
        if token[0] is "-":
            replacements.append((token, token[1:]))
            
        elif token[-1] is "-":
            replacements.append((token, token[:-1]))
        else:
            pass
        
    if len(replacements) > 0:
        print("{}: {}".format(filename, replacements))
        
        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
SOL19000510-V15-17-page10.txt: [('-', ''), ('-christian.', 'christian.')]
SOL19000510-V15-17-page11.txt: [('v-', 'v'), ('-', ''), ('polit-', 'polit'), ('-inch', 'inch')]
SOL19000510-V15-17-page13.txt: [('any-', 'any')]
SOL19000510-V15-17-page14.txt: [('-', ''), ('-', '')]
SOL19000510-V15-17-page15.txt: [('-', ''), ('-', '')]
SOL19000510-V15-17-page16.txt: [('-.Ie', '.Ie'), ('CO-', 'CO'), ('SEN-', 'SEN')]
SOL19000510-V15-17-page4.txt: [('-', ''), ('-', '')]
SOL19000510-V15-17-page7.txt: [('-', ''), ('gov-', 'gov')]
SOL19000510-V15-17-page9.txt: [('Sam-', 'Sam')]
SOL19000517-V15-18-page11.txt: [('pur-', 'pur')]
SOL19000517-V15-18-page14.txt: [('-to', 'to'), ('-', '')]
SOL19000517-V15-18-page15.txt: [('-', ''), ('-', '')]
SOL19000517-V15-18-page16.txt: [('-of', 'of')]
SOL19000517-V15-18-page3.txt: [('-', '')]
SOL19000517-V15-18-page4.txt: [('-', '')]
SOL19000517-V15-18-page5.txt: [('-', '')]
SOL19000517-V15-18-page9.txt: [('-', '')]
SOL19000531-V15-21-page12.txt: [('Fur-', 'Fur')]
SOL19000531-V15-21-page14.txt: [('edi-', 'edi'), ('-', ''), ('-', ''), ('-', '')]
SOL19000531-V15-21-page2.txt: [('-say', 'say'), ('ques-', 'ques')]
SOL19000614-V15-23-page1.txt: [('de-', 'de')]
SOL19000614-V15-23-page13.txt: [('add-', 'add')]
SOL19000614-V15-23-page14.txt: [('ad-', 'ad'), ('-', ''), ('-k', 'k'), ('-', '')]
SOL19000614-V15-23-page16.txt: [('-any', 'any'), ('-', ''), ('-', ''), ('by"-', 'by"')]
SOL19000614-V15-23-page2.txt: [('-', '')]
SOL19000614-V15-23-page3.txt: [('Chris-', 'Chris')]
SOL19000614-V15-23-page7.txt: [('Con-', 'Con')]
SOL19000614-V15-23-page9.txt: [('-', '')]
SOL19000628-V15-25-page10.txt: [('Mc-', 'Mc')]
SOL19000628-V15-25-page11.txt: [('-n-ipSENTINEL', 'n-ipSENTINEL')]
SOL19000628-V15-25-page14.txt: [('ad-', 'ad'), ('-rogue', 'rogue'), ('-', ''), ('-', '')]
SOL19000628-V15-25-page15.txt: [('A-', 'A'), ('H-', 'H'), ('si-', 'si'), ('A-', 'A'), ('-', ''), ('PUBLISHING-', 'PUBLISHING')]
SOL19000628-V15-25-page16.txt: [('TINlEL-', 'TINlEL'), ('-', '')]
SOL19000628-V15-25-page5.txt: [('PRO-', 'PRO'), ('-', '')]
SOL19000628-V15-25-page6.txt: [('prin-', 'prin')]
SOL19000628-V15-25-page8.txt: [('pre-', 'pre')]
SOL19000726-V15-29-page11.txt: [('par-', 'par')]
SOL19000726-V15-29-page13.txt: [('-', ''), ('I-', 'I')]
SOL19000726-V15-29-page16.txt: [('-Any', 'Any'), ('-', '')]
SOL19000726-V15-29-page2.txt: [('-', ''), ('-you', 'you')]
SOL19000726-V15-29-page4.txt: [('the-', 'the')]
SOL19000726-V15-29-page9.txt: [('simi-', 'simi'), ('-', ''), ('occa-', 'occa')]
SOL19000802-V15-30-page1.txt: [('-of', 'of')]
SOL19000802-V15-30-page11.txt: [('Mc-', 'Mc')]
SOL19000802-V15-30-page13.txt: [('-', '')]
SOL19000802-V15-30-page14.txt: [('ad-', 'ad'), ('-', ''), ('-', ''), ('cir-', 'cir'), ('-', '')]
SOL19000802-V15-30-page15.txt: [('-K', 'K')]
SOL19000802-V15-30-page16.txt: [('-ro', 'ro')]
SOL19000809-V15-31-page1.txt: [('distin-', 'distin')]
SOL19000809-V15-31-page10.txt: [('-NO', 'NO'), ('wis-', 'wis')]
SOL19000809-V15-31-page12.txt: [('-', '')]
SOL19000809-V15-31-page14.txt: [('-', ''), ('ad-', 'ad'), ('-', ''), ('-', '')]
SOL19000809-V15-31-page15.txt: [('-', '')]
SOL19000809-V15-31-page16.txt: [('non-', 'non')]
SOL19000809-V15-31-page5.txt: [('horri-', 'horri')]
SOL19000809-V15-31-page6.txt: [('Ro-', 'Ro')]
SOL19000809-V15-31-page8.txt: [('-', '')]
SOL19000816-V15-32-page1.txt: [('-inherent', 'inherent')]
SOL19000816-V15-32-page11.txt: [('-"', '"'), ('-words', 'words'), ('re-', 're'), ('broad-', 'broad'), ('-sword', 'sword'), ('-on', 'on'), ('enrolled-', 'enrolled'), ('-', '')]
SOL19000816-V15-32-page13.txt: [('-tossing', 'tossing'), ('-the', 'the'), ('-forth', 'forth')]
SOL19000816-V15-32-page14.txt: [('-', ''), ('-', '')]
SOL19000816-V15-32-page16.txt: [('LIB-', 'LIB'), ('Constitu-', 'Constitu')]
SOL19000816-V15-32-page3.txt: [('indi-', 'indi'), ('-Thursday', 'Thursday')]
SOL19000816-V15-32-page4.txt: [('-', ''), ('train-', 'train')]
SOL19000816-V15-32-page6.txt: [('-which', 'which'), ('utter-', 'utter'), ('pre-', 'pre'), ('na-', 'na'), ('-tions', 'tions')]
SOL19000816-V15-32-page7.txt: [('can-', 'can'), ('-of', 'of'), ('-of', 'of'), ('-outpouring', 'outpouring')]
SOL19000816-V15-32-page8.txt: [('-', '')]
SOL19000823-V15-33-page1.txt: [('.-', '.')]
SOL19000823-V15-33-page10.txt: [('non-', 'non'), ('un-', 'un')]
SOL19000823-V15-33-page12.txt: [('-by', 'by'), ('--Catholic', '-Catholic'), ('-', '')]
SOL19000823-V15-33-page13.txt: [('Tien-', 'Tien')]
SOL19000823-V15-33-page14.txt: [('-', ''), ('-', '')]
SOL19000823-V15-33-page15.txt: [('"-', '"')]
SOL19000823-V15-33-page16.txt: [('.Q-', '.Q')]
SOL19000823-V15-33-page5.txt: [('-', ''), ('-we', 'we')]
SOL19000823-V15-33-page6.txt: [('-The', 'The'), ('-of', 'of')]
SOL19000823-V15-33-page7.txt: [('-', ''), ('-', '')]
SOL19000823-V15-33-page8.txt: [('-impress', 'impress')]
SOL19000823-V15-33-page9.txt: [('-and', 'and')]
SOL19000913-V15-36-page12.txt: [('civil-', 'civil')]
SOL19000913-V15-36-page13.txt: [('composed-', 'composed')]
SOL19000913-V15-36-page15.txt: [('-', ''), ('-', '')]
SOL19000913-V15-36-page2.txt: [('govern-', 'govern')]
SOL19000913-V15-36-page5.txt: [('-', ''), ('en-', 'en')]
SOL19000913-V15-36-page6.txt: [('in-', 'in')]
SOL19000913-V15-36-page9.txt: [('-', '')]
SOL19000927-V15-38-page1.txt: [('-Jesus', 'Jesus')]
SOL19000927-V15-38-page10.txt: [('-', ''), ('-any', 'any'), ('en-', 'en')]
SOL19000927-V15-38-page12.txt: [('-', '')]
SOL19000927-V15-38-page13.txt: [('AUS-', 'AUS'), ('govern-', 'govern'), ('-', '')]
SOL19000927-V15-38-page14.txt: [('-', ''), ('-', ''), ('-', '')]
SOL19000927-V15-38-page15.txt: [('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-LLLLLL.', 'LLLLLL.'), ('-', ''), ('-', ''), ('-', '')]
SOL19000927-V15-38-page16.txt: [('-', ''), ('-', '')]
SOL19000927-V15-38-page4.txt: [('"Many-', '"Many')]
SOL19000927-V15-38-page5.txt: [('-', ''), ('-', ''), ('-', ''), ('Chris-', 'Chris'), ('-department', 'department'), ('do-', 'do'), ('govern-', 'govern'), ('de-', 'de'), ('in-', 'in'), ('wide-', 'wide')]
SOL19000927-V15-38-page6.txt: [('-that', 'that'), ('-with', 'with'), ('more-', 'more'), ('-considered', 'considered'), ('t--', 't-')]
SOL19000927-V15-38-page8.txt: [('-no', 'no'), ('Mc-', 'Mc')]
SOL19000927-V15-38-page9.txt: [('de-', 'de')]
SOL19001005-V15-39-page1.txt: [('-', ''), ('-', '')]
SOL19001005-V15-39-page12.txt: [('Ameri-', 'Ameri')]
SOL19001005-V15-39-page13.txt: [('commenda-', 'commenda')]
SOL19001005-V15-39-page14.txt: [('-', '')]
SOL19001005-V15-39-page15.txt: [('-a-', 'a-'), ('-', ''), ('-', ''), ('\'\'\'\'"\'TTTTTTTTTTTTrrTTTTT.TTTTT-', '\'\'\'\'"\'TTTTTTTTTTTTrrTTTTT.TTTTT')]
SOL19001005-V15-39-page3.txt: [('-shall', 'shall')]
SOL19001005-V15-39-page7.txt: [('-and', 'and'), ('some-', 'some'), ('-', ''), ('work-', 'work')]
SOL19001005-V15-39-page8.txt: [('Anglo-', 'Anglo')]
SOL19001005-V15-39-page9.txt: [('mat-', 'mat'), ('ARBI-', 'ARBI'), ('Eng-', 'Eng'), ('notwith-', 'notwith')]
SOL19001101-V15-43-page10.txt: [('-', ''), ('agree-', 'agree'), ('-keeping', 'keeping'), ('Cath-', 'Cath')]
SOL19001101-V15-43-page12.txt: [('non-', 'non'), ('non-', 'non'), ('en-', 'en')]
SOL19001101-V15-43-page13.txt: [('de-', 'de'), ('bar-', 'bar')]
SOL19001101-V15-43-page14.txt: [('b-', 'b'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19001101-V15-43-page15.txt: [("-'", "'")]
SOL19001101-V15-43-page16.txt: [('NI-El-', 'NI-El'), ('-', ''), ('IT-', 'IT'), ('-rang', 'rang')]
SOL19001101-V15-43-page2.txt: [('self-', 'self')]
SOL19001101-V15-43-page3.txt: [('-r.', 'r.')]
SOL19001101-V15-43-page5.txt: [('char-', 'char'), ('dis-', 'dis'), ('Estab-', 'Estab'), ('-', ''), ('-', '')]
SOL19001101-V15-43-page7.txt: [('-', ''), ('-', ''), ('enforce-', 'enforce')]
SOL19001108-V15-44-page1.txt: [('self-con-', 'self-con')]
SOL19001108-V15-44-page10.txt: [('-', '')]
SOL19001108-V15-44-page13.txt: [('--Farm', '-Farm')]
SOL19001108-V15-44-page14.txt: [('-', ''), ('-', ''), ('-R', 'R'), ('-It', 'It'), ('-', '')]
SOL19001108-V15-44-page15.txt: [('-', ''), ('-', ''), ('SEN-', 'SEN')]
SOL19001108-V15-44-page16.txt: [('...--', '...-'), ('ENTIILE--', 'ENTIILE-'), ('-A', 'A'), ('cr.-', 'cr.'), ('-', '')]
SOL19001108-V15-44-page2.txt: [('spreading-', 'spreading')]
SOL19001108-V15-44-page6.txt: [('Cath-', 'Cath')]
SOL19001108-V15-44-page9.txt: [('coun-', 'coun'), ('dere-', 'dere')]
SOL19011201-V17-01-page10.txt: [('-upon', 'upon')]
SOL19011201-V17-01-page12.txt: [('-assert', 'assert'), ('individ-', 'individ')]
SOL19011201-V17-01-page2.txt: [('Anglo-', 'Anglo')]
SOL19011201-V17-01-page24.txt: [('un-', 'un')]
SOL19011201-V17-01-page27.txt: [('-', ''), ('free-', 'free')]
SOL19011201-V17-01-page29.txt: [('-advanced', 'advanced')]
SOL19011201-V17-01-page3.txt: [('-', '')]
SOL19011201-V17-01-page31.txt: [('-Therefore', 'Therefore')]
SOL19011201-V17-01-page34.txt: [('coun-', 'coun')]
SOL19011201-V17-01-page35.txt: [('ef-', 'ef')]
SOL19011201-V17-01-page37.txt: [('Inde-', 'Inde')]
SOL19011201-V17-01-page39.txt: [('ex-', 'ex')]
SOL19011201-V17-01-page44.txt: [('"Sab-', '"Sab'), ('re-', 're'), ('ser-', 'ser')]
SOL19011201-V17-01-page45.txt: [('ap-', 'ap')]
SOL19011201-V17-01-page49.txt: [('-', '')]
SOL19011201-V17-01-page50.txt: [('-', '')]
SOL19011201-V17-01-page51.txt: [('Mc-', 'Mc')]
SOL19011201-V17-01-page52.txt: [('them-', 'them')]
SOL19011201-V17-01-page54.txt: [('-', '')]
SOL19011201-V17-01-page55.txt: [('-', '')]
SOL19011201-V17-01-page56.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19011201-V17-01-page57.txt: [('-', ''), ('-', ''), ('-', '')]
SOL19011201-V17-01-page58.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-..-', '..-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19011201-V17-01-page59.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ("-iriatlilill'Ariailtarika-", "iriatlilill'Ariailtarika-"), ('-', '')]
SOL19011201-V17-01-page60.txt: [('CON-', 'CON'), ('-', ''), ('-', ''), ('-', ''), ('-V', 'V')]
SOL19011201-V17-01-page9.txt: [('from-', 'from')]
SOL19020101-V17-02-page1.txt: [('-to', 'to')]
SOL19020101-V17-02-page10.txt: [('"compact-', '"compact')]
SOL19020101-V17-02-page12.txt: [('terri-', 'terri')]
SOL19020101-V17-02-page13.txt: [('-', ''), ('----', '---')]
SOL19020101-V17-02-page14.txt: [('-from', 'from')]
SOL19020101-V17-02-page15.txt: [('-', '')]
SOL19020101-V17-02-page18.txt: [('to-', 'to')]
SOL19020101-V17-02-page19.txt: [('al-', 'al')]
SOL19020101-V17-02-page20.txt: [('Chris-', 'Chris')]
SOL19020101-V17-02-page22.txt: [('refer-', 'refer')]
SOL19020101-V17-02-page23.txt: [('-observed', 'observed'), ('-of', 'of'), ('-country.', 'country.'), ('se-', 'se'), ('-cured', 'cured'), ('-the', 'the'), ('-criminal', 'criminal'), ('pronoun-', 'pronoun')]
SOL19020101-V17-02-page28.txt: [('-But', 'But'), ('main-', 'main')]
SOL19020101-V17-02-page29.txt: [('appear-', 'appear')]
SOL19020101-V17-02-page3.txt: [('-', '')]
SOL19020101-V17-02-page30.txt: [('FOR-', 'FOR')]
SOL19020101-V17-02-page33.txt: [('con-', 'con')]
SOL19020101-V17-02-page34.txt: [('pos-', 'pos')]
SOL19020101-V17-02-page35.txt: [('pro-', 'pro')]
SOL19020101-V17-02-page37.txt: [('en-', 'en')]
SOL19020101-V17-02-page38.txt: [('intoler-', 'intoler')]
SOL19020101-V17-02-page40.txt: [('ob-', 'ob')]
SOL19020101-V17-02-page42.txt: [('per-', 'per')]
SOL19020101-V17-02-page44.txt: [('Sab-', 'Sab')]
SOL19020101-V17-02-page48.txt: [('Consti-', 'Consti')]
SOL19020101-V17-02-page5.txt: [('-the', 'the'), ('-', ''), ('by-', 'by')]
SOL19020101-V17-02-page51.txt: [('in-', 'in')]
SOL19020101-V17-02-page53.txt: [('-', '')]
SOL19020101-V17-02-page55.txt: [('Spanish-', 'Spanish')]
SOL19020101-V17-02-page56.txt: [('-', '')]
SOL19020101-V17-02-page60.txt: [('law-', 'law')]
SOL19020101-V17-02-page63.txt: [('-instead', 'instead')]
SOL19020101-V17-02-page65.txt: [('resolu-', 'resolu')]
SOL19020101-V17-02-page68.txt: [('-', ''), ('-', ''), ('-', ''), ('P-', 'P'), ('.--', '.-'), ('-iti..', 'iti..'), ('-', ''), ('-', '')]
SOL19020101-V17-02-page69.txt: [('CON-', 'CON'), ('-', ''), ('-', ''), ('-', ''), ('test-', 'test'), ('-', ''), ('-', '')]
SOL19020101-V17-02-page7.txt: [('-', ''), ('-', ''), ('-', '')]
SOL19020101-V17-02-page70.txt: [('--', '-'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.')]
SOL19020101-V17-02-page72.txt: [('-', ''), ('-', '')]
SOL19020101-V17-02-page8.txt: [('anti-', 'anti')]
SOL19020201-V17-03-page1.txt: [('-', ''), ('pur-', 'pur')]
SOL19020201-V17-03-page12.txt: [('entire-', 'entire')]
SOL19020201-V17-03-page17.txt: [('sen-', 'sen')]
SOL19020201-V17-03-page2.txt: [('do-', 'do')]
SOL19020201-V17-03-page22.txt: [('-the', 'the')]
SOL19020201-V17-03-page27.txt: [('-', '')]
SOL19020201-V17-03-page3.txt: [('en-', 'en')]
SOL19020201-V17-03-page30.txt: [('Chris-', 'Chris')]
SOL19020201-V17-03-page37.txt: [('-s', 's')]
SOL19020201-V17-03-page38.txt: [('un-', 'un')]
SOL19020201-V17-03-page41.txt: [('Bel-', 'Bel')]
SOL19020201-V17-03-page44.txt: [('saloon-keep-', 'saloon-keep')]
SOL19020201-V17-03-page48.txt: [('the-', 'the')]
SOL19020201-V17-03-page51.txt: [('-', ''), ('post-', 'post'), ('na-', 'na')]
SOL19020201-V17-03-page53.txt: [('Park-', 'Park')]
SOL19020201-V17-03-page54.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('en-', 'en')]
SOL19020201-V17-03-page55.txt: [('-', '')]
SOL19020201-V17-03-page56.txt: [('-', ''), ('SEN-', 'SEN'), ('SENTI-', 'SENTI')]
SOL19020201-V17-03-page57.txt: [('-', ''), ('-', '')]
SOL19020201-V17-03-page58.txt: [('-', ''), ('-', ''), ('-', '')]
SOL19020201-V17-03-page59.txt: [('-', ''), ('-', ''), ('Missouri-', 'Missouri')]
SOL19020201-V17-03-page60.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('SOY-', 'SOY'), ('-mon', 'mon'), ("-O-mon's", "O-mon's"), ("Sol'-", "Sol'"), ('-mon', 'mon')]
SOL19020201-V17-03-page61.txt: [('CON-', 'CON'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-V', 'V'), ('-Kitchen', 'Kitchen'), ('-', '')]
SOL19020201-V17-03-page62.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-uyew', 'uyew'), ('u-a-', 'u-a')]
SOL19020201-V17-03-page63.txt: [('-', ''), ('-', ''), ('-', ''), ('iv-', 'iv')]
SOL19020201-V17-03-page64.txt: [('-', '')]
SOL19020201-V17-03-page7.txt: [('ca-', 'ca')]
SOL19020301-V17-04-page12.txt: [('in-', 'in')]
SOL19020301-V17-04-page19.txt: [('inter-', 'inter')]
SOL19020301-V17-04-page23.txt: [('re-', 're')]
SOL19020301-V17-04-page26.txt: [('-', '')]
SOL19020301-V17-04-page31.txt: [('con-', 'con')]
SOL19020301-V17-04-page32.txt: [('Anglo-', 'Anglo'), ('Anglo-', 'Anglo')]
SOL19020301-V17-04-page34.txt: [('some-', 'some'), ('-.fling', '.fling'), ('Apostolic-', 'Apostolic'), ('de-', 'de')]
SOL19020301-V17-04-page37.txt: [('con-', 'con')]
SOL19020301-V17-04-page39.txt: [('"Blue-', '"Blue')]
SOL19020301-V17-04-page4.txt: [('mat-', 'mat')]
SOL19020301-V17-04-page40.txt: [('-middleclass', 'middleclass')]
SOL19020301-V17-04-page46.txt: [('per-', 'per')]
SOL19020301-V17-04-page53.txt: [('amend-', 'amend')]
SOL19020301-V17-04-page54.txt: [('an-', 'an')]
SOL19020301-V17-04-page55.txt: [('Spanish-', 'Spanish'), ('no-', 'no'), ('-', ''), ('de-', 'de')]
SOL19020301-V17-04-page57.txt: [('British-', 'British')]
SOL19020301-V17-04-page59.txt: [('-', '')]
SOL19020301-V17-04-page61.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', '')]
SOL19020301-V17-04-page62.txt: [('-', ''), ('--qt', '-qt'), ('-', ''), ('-o', 'o'), ('-o-m.c', 'o-m.c'), ('-.', '.'), ('--c.', '-c.'), ('-', '')]
SOL19020301-V17-04-page63.txt: [('-.', '.'), ('--', '-'), ('-', ''), ('-', ''), ("-.'", ".'"), ('..--', '..-'), ('-.-', '.-'), ('-.', '.'), ('-', ''), ('-', ''), ('....-', '....'), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('.--', '.-'), ('-.', '.'), ("'-", "'"), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19020401-V17-05-page12.txt: [('SENTI-', 'SENTI')]
SOL19020401-V17-05-page17.txt: [('-', '')]
SOL19020401-V17-05-page19.txt: [('ter-', 'ter')]
SOL19020401-V17-05-page2.txt: [('-', '')]
SOL19020401-V17-05-page23.txt: [('---I', '--I')]
SOL19020401-V17-05-page26.txt: [('admit-', 'admit')]
SOL19020401-V17-05-page27.txt: [('legisla-', 'legisla')]
SOL19020401-V17-05-page28.txt: [('ex-', 'ex')]
SOL19020401-V17-05-page36.txt: [("riv'-", "riv'"), ('Wash-', 'Wash'), ('im-', 'im')]
SOL19020401-V17-05-page38.txt: [('re-', 're')]
SOL19020401-V17-05-page39.txt: [('non-', 'non')]
SOL19020401-V17-05-page41.txt: [('non-', 'non')]
SOL19020401-V17-05-page45.txt: [('refer-', 'refer')]
SOL19020401-V17-05-page52.txt: [('-hoped', 'hoped')]
SOL19020401-V17-05-page54.txt: [('Anglo-', 'Anglo')]
SOL19020401-V17-05-page56.txt: [('prose-', 'prose')]
SOL19020401-V17-05-page58.txt: [('Ex-', 'Ex')]
SOL19020401-V17-05-page60.txt: [('-', '')]
SOL19020401-V17-05-page61.txt: [('-', ''), ('SEN-', 'SEN')]
SOL19020401-V17-05-page62.txt: [('-qb.', 'qb.'), ('-', ''), ('re-', 're')]
SOL19020401-V17-05-page8.txt: [('Ethel-', 'Ethel')]
SOL19020401-V17-05-page9.txt: [('-', ''), ('suf-', 'suf')]
SOL19020501-V17-06-page11.txt: [('character-', 'character')]
SOL19020501-V17-06-page14.txt: [('-and', 'and')]
SOL19020501-V17-06-page17.txt: [('an-', 'an')]
SOL19020501-V17-06-page18.txt: [('Governor-', 'Governor')]
SOL19020501-V17-06-page2.txt: [('Medo-', 'Medo')]
SOL19020501-V17-06-page20.txt: [('-', ''), ('-c', 'c'), ('-', ''), ('-', ''), ("'-", "'"), ('-', ''), ('-', ''), ('"at--', '"at-'), ('ov-', 'ov'), ('-', ''), ("'N--", "'N-"), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.A', '.A'), ('-sk', 'sk'), ('-', ''), ('-z', 'z'), ('-', '')]
SOL19020501-V17-06-page21.txt: [('-', ''), ('posses-', 'posses')]
SOL19020501-V17-06-page22.txt: [('proper-', 'proper')]
SOL19020501-V17-06-page24.txt: [('au-', 'au')]
SOL19020501-V17-06-page26.txt: [('-', '')]
SOL19020501-V17-06-page29.txt: [('considera-', 'considera'), ('--', '-'), ('Cu-', 'Cu')]
SOL19020501-V17-06-page31.txt: [('-per-cent.', 'per-cent.'), ('-per-cent.', 'per-cent.'), ('-', '')]
SOL19020501-V17-06-page35.txt: [('in-', 'in'), ('Cath-', 'Cath')]
SOL19020501-V17-06-page4.txt: [('-', '')]
SOL19020501-V17-06-page41.txt: [('where-', 'where')]
SOL19020501-V17-06-page42.txt: [('simply-', 'simply')]
SOL19020501-V17-06-page45.txt: [('cor-', 'cor')]
SOL19020501-V17-06-page46.txt: [('Mc-', 'Mc')]
SOL19020501-V17-06-page47.txt: [('-religious', 'religious')]
SOL19020501-V17-06-page48.txt: [('main-', 'main')]
SOL19020501-V17-06-page50.txt: [('Mc-', 'Mc')]
SOL19020501-V17-06-page55.txt: [('-', '')]
SOL19020501-V17-06-page56.txt: [('in-', 'in')]
SOL19020501-V17-06-page57.txt: [('Lieutehant-', 'Lieutehant')]
SOL19020501-V17-06-page58.txt: [('Anglo-', 'Anglo')]
SOL19020501-V17-06-page61.txt: [('SEN-', 'SEN'), ('LIB-', 'LIB'), ('LIB-', 'LIB')]
SOL19020501-V17-06-page62.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-e', 'e'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-"', '"')]
SOL19020501-V17-06-page8.txt: [('-page', 'page')]
SOL19020601-V17-07-page12.txt: [('Anglo-', 'Anglo')]
SOL19020601-V17-07-page14.txt: [('particu-', 'particu')]
SOL19020601-V17-07-page18.txt: [('follow-', 'follow')]
SOL19020601-V17-07-page20.txt: [('-', '')]
SOL19020601-V17-07-page26.txt: [('particu-', 'particu')]
SOL19020601-V17-07-page29.txt: [('"supplemen-', '"supplemen')]
SOL19020601-V17-07-page32.txt: [('organ-', 'organ')]
SOL19020601-V17-07-page34.txt: [('Anglo-', 'Anglo')]
SOL19020601-V17-07-page35.txt: [('-', ''), ('Pro-', 'Pro')]
SOL19020601-V17-07-page36.txt: [('-', '')]
SOL19020601-V17-07-page4.txt: [('gov-', 'gov')]
SOL19020601-V17-07-page40.txt: [('Mc-', 'Mc')]
SOL19020601-V17-07-page41.txt: [('state-', 'state')]
SOL19020601-V17-07-page43.txt: [('Tem-', 'Tem'), ('confec-', 'confec')]
SOL19020601-V17-07-page46.txt: [('-', ''), ('-two', 'two')]
SOL19020601-V17-07-page48.txt: [('re-', 're')]
SOL19020601-V17-07-page49.txt: [('con-', 'con')]
SOL19020601-V17-07-page5.txt: [('ma-', 'ma')]
SOL19020601-V17-07-page50.txt: [('chi-', 'chi'), ('-bring', 'bring'), ('-exercise', 'exercise')]
SOL19020601-V17-07-page51.txt: [('ob-', 'ob')]
SOL19020601-V17-07-page55.txt: [('-case', 'case'), ('-prevalent', 'prevalent')]
SOL19020601-V17-07-page57.txt: [('an-', 'an')]
SOL19020601-V17-07-page58.txt: [('-', ''), ('-petitioned', 'petitioned'), ('Sun-', 'Sun')]
SOL19020601-V17-07-page60.txt: [('re-', 're')]
SOL19020601-V17-07-page61.txt: [('bath-', 'bath')]
SOL19020601-V17-07-page67.txt: [('Robin-', 'Robin')]
SOL19020601-V17-07-page70.txt: [('whole-', 'whole'), ('re-', 're')]
SOL19020601-V17-07-page72.txt: [('an-', 'an')]
SOL19020601-V17-07-page73.txt: [('Inter-', 'Inter'), ('prom-', 'prom')]
SOL19020601-V17-07-page75.txt: [('SEN-', 'SEN'), ('LIB-', 'LIB'), ('cor-', 'cor')]
SOL19020601-V17-07-page76.txt: [('Ex-', 'Ex')]
SOL19020601-V17-07-page77.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19020601-V17-07-page78.txt: [('-', ''), ('-', '')]
SOL19020601-V17-07-page9.txt: [('anti-', 'anti'), ('big-', 'big')]
SOL19020701-V17-08-page1.txt: [('per-', 'per')]
SOL19020701-V17-08-page15.txt: [('civil-', 'civil')]
SOL19020701-V17-08-page16.txt: [('-', '')]
SOL19020701-V17-08-page21.txt: [('commis-', 'commis')]
SOL19020701-V17-08-page22.txt: [('ob-', 'ob')]
SOL19020701-V17-08-page23.txt: [('com-', 'com')]
SOL19020701-V17-08-page26.txt: [('-but', 'but')]
SOL19020701-V17-08-page27.txt: [('GOVERN-', 'GOVERN'), ('CONSIDERA-', 'CONSIDERA'), ('UN-', 'UN'), ('ex-', 'ex')]
SOL19020701-V17-08-page28.txt: [('re-', 're')]
SOL19020701-V17-08-page32.txt: [('RIGHT-', 'RIGHT')]
SOL19020701-V17-08-page34.txt: [('Independence-', 'Independence')]
SOL19020701-V17-08-page35.txt: [('im-', 'im')]
SOL19020701-V17-08-page40.txt: [('Administra-', 'Administra')]
SOL19020701-V17-08-page41.txt: [('coun-', 'coun')]
SOL19020701-V17-08-page44.txt: [('-', '')]
SOL19020701-V17-08-page45.txt: [('car-', 'car')]
SOL19020701-V17-08-page48.txt: [('-', ''), ('horse-', 'horse')]
SOL19020701-V17-08-page5.txt: [('rea-', 'rea')]
SOL19020701-V17-08-page57.txt: [('-read', 'read'), ('re-', 're')]
SOL19020701-V17-08-page58.txt: [('hu-', 'hu')]
SOL19020701-V17-08-page59.txt: [('--', '-'), ('-where', 'where')]
SOL19020701-V17-08-page6.txt: [('re-', 're')]
SOL19020701-V17-08-page61.txt: [('-', ''), ('-', ''), ('-', ''), ('-..-', '..-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19020701-V17-08-page62.txt: [('Ring-', 'Ring'), ('Ro-', 'Ro'), ('-Christian', 'Christian'), ('ed-', 'ed'), ('Ques-', 'Ques'), ('-.', '.'), ('Re-', 'Re'), ('Ad-', 'Ad'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('X.-', 'X.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.X.', '.X.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', '')]
SOL19020701-V17-08-page64.txt: [('DEC-', 'DEC')]
SOL19020701-V17-08-page7.txt: [('CHRIS-', 'CHRIS'), ('establish-', 'establish')]
SOL19020801-V17-09-page10.txt: [('-of', 'of'), ('-endowed', 'endowed')]
SOL19020801-V17-09-page13.txt: [('con-', 'con'), ('compro-', 'compro')]
SOL19020801-V17-09-page14.txt: [('-an', 'an'), ('-.', '.')]
SOL19020801-V17-09-page16.txt: [('-', ''), ('Philip-', 'Philip')]
SOL19020801-V17-09-page19.txt: [('Friday.-', 'Friday.'), ('i-', 'i')]
SOL19020801-V17-09-page20.txt: [('de-', 'de')]
SOL19020801-V17-09-page22.txt: [('satis-', 'satis'), ('Mur-', 'Mur'), ('rep-', 'rep'), ('Sab-', 'Sab'), ('working-', 'working')]
SOL19020801-V17-09-page3.txt: [('-now', 'now')]
SOL19020801-V17-09-page30.txt: [('ar-', 'ar')]
SOL19020801-V17-09-page33.txt: [('ambi-', 'ambi')]
SOL19020801-V17-09-page35.txt: [('Ex-', 'Ex')]
SOL19020801-V17-09-page36.txt: [('-', '')]
SOL19020801-V17-09-page39.txt: [('Nich-', 'Nich')]
SOL19020801-V17-09-page4.txt: [('fu-', 'fu')]
SOL19020801-V17-09-page40.txt: [('-and', 'and')]
SOL19020801-V17-09-page41.txt: [('non-', 'non'), ('se-', 'se')]
SOL19020801-V17-09-page44.txt: [('observ-', 'observ')]
SOL19020801-V17-09-page49.txt: [('ar-', 'ar')]
SOL19020801-V17-09-page5.txt: [('Dem-', 'Dem')]
SOL19020801-V17-09-page51.txt: [('-', '')]
SOL19020801-V17-09-page52.txt: [('-suppose', 'suppose')]
SOL19020801-V17-09-page57.txt: [('corn-', 'corn'), ('-', '')]
SOL19020801-V17-09-page60.txt: [('-', ''), ('-', '')]
SOL19020801-V17-09-page61.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('por-', 'por'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('x-x-x-', 'x-x-x'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19020801-V17-09-page7.txt: [('in-', 'in')]
SOL19020801-V17-09-page8.txt: [('Chicago-Times-', 'Chicago-Times'), ('Times-', 'Times')]
SOL19020901-V17-10-page1.txt: [('pre-', 'pre')]
SOL19020901-V17-10-page14.txt: [('recogni-', 'recogni')]
SOL19020901-V17-10-page15.txt: [('-"the', '"the'), ('dif-', 'dif')]
SOL19020901-V17-10-page17.txt: [('-', ''), ('-', ''), ('-', ''), ('episco-', 'episco')]
SOL19020901-V17-10-page2.txt: [('-May', 'May'), ('-', '')]
SOL19020901-V17-10-page20.txt: [('-', '')]
SOL19020901-V17-10-page25.txt: [('lib-', 'lib')]
SOL19020901-V17-10-page26.txt: [('-', '')]
SOL19020901-V17-10-page28.txt: [('re-', 're')]
SOL19020901-V17-10-page30.txt: [('con-', 'con')]
SOL19020901-V17-10-page31.txt: [('-', '')]
SOL19020901-V17-10-page32.txt: [('state-usurp-', 'state-usurp')]
SOL19020901-V17-10-page33.txt: [('im-', 'im')]
SOL19020901-V17-10-page34.txt: [('ad-', 'ad'), ('-', '')]
SOL19020901-V17-10-page37.txt: [('pos-', 'pos'), ('con-', 'con')]
SOL19020901-V17-10-page38.txt: [('Mc-', 'Mc')]
SOL19020901-V17-10-page43.txt: [('-how', 'how')]
SOL19020901-V17-10-page44.txt: [('-', '')]
SOL19020901-V17-10-page47.txt: [('-in', 'in')]
SOL19020901-V17-10-page51.txt: [('wor-', 'wor'), ('doc-', 'doc')]
SOL19020901-V17-10-page56.txt: [('Sund-', 'Sund'), ('-', ''), ('-', ''), ('-', ''), ('prac-', 'prac')]
SOL19020901-V17-10-page58.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('--', '-'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-Christian', 'Christian'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-X..', 'X..'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19020901-V17-10-page59.txt: [('-', ''), ('chronolog-', 'chronolog'), ('-', ''), ('-x', 'x'), ('-.', '.'), ('-', ''), ('-', ''), ('-x-', 'x-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('x-x-', 'x-x'), ('-', ''), ('-', ''), ('-', ''), ('-x-', 'x-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19020901-V17-10-page6.txt: [('turn-', 'turn')]
SOL19020901-V17-10-page60.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-..-', '..-'), ('-.-', '.-'), ('-', ''), ('-', ''), ('-', ''), ('..-', '..'), ('-\'KO"-', '\'KO"-'), ('-', ''), ('.-', '.'), ('-S-', 'S-'), ('-.', '.')]
SOL19020901-V17-10-page66.txt: [('-', '')]
SOL19020901-V17-10-page9.txt: [('hum-', 'hum')]
SOL19021001-V17-11-page14.txt: [('de-', 'de')]
SOL19021001-V17-11-page15.txt: [('neces-', 'neces')]
SOL19021001-V17-11-page19.txt: [('-', ''), ('-', ''), ('-has', 'has')]
SOL19021001-V17-11-page21.txt: [('disturb-', 'disturb')]
SOL19021001-V17-11-page23.txt: [('-', '')]
SOL19021001-V17-11-page27.txt: [('Lu-', 'Lu')]
SOL19021001-V17-11-page28.txt: [('free-', 'free')]
SOL19021001-V17-11-page3.txt: [('pro-', 'pro')]
SOL19021001-V17-11-page31.txt: [('in-', 'in')]
SOL19021001-V17-11-page33.txt: [('-way', 'way'), ('be-', 'be')]
SOL19021001-V17-11-page39.txt: [('re-', 're')]
SOL19021001-V17-11-page41.txt: [('-', '')]
SOL19021001-V17-11-page45.txt: [('ambition.-', 'ambition.')]
SOL19021001-V17-11-page47.txt: [('inci-', 'inci')]
SOL19021001-V17-11-page48.txt: [('re-', 're')]
SOL19021001-V17-11-page5.txt: [('CORRUP-', 'CORRUP')]
SOL19021001-V17-11-page54.txt: [('or-', 'or')]
SOL19021001-V17-11-page57.txt: [('en-', 'en')]
SOL19021001-V17-11-page58.txt: [('SEN-', 'SEN')]
SOL19021001-V17-11-page59.txt: [('.-', '.'), ('-', ''), ('-.', '.'), ('-', ''), ('--', '-'), ('-.', '.'), ('.-', '.'), ('-', ''), ('-.', '.'), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Ring-', 'Ring'), ('-', ''), ('-', ''), ('-', ''), ('Ro-', 'Ro'), ('Ques-', 'Ques'), ('Re-', 'Re'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Ad-', 'Ad'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('.-', '.'), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-x-', 'x-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19021001-V17-11-page6.txt: [('per-', 'per')]
SOL19021001-V17-11-page60.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-x-', 'x-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19021001-V17-11-page61.txt: [('.....-', '.....'), ('--', '-'), ('..---', '..--'), ('-', ''), ('-', ''), ('-.-.', '.-.'), ('-----z', '----z'), ('--', '-'), ('--', '-'), ('---', '--'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('..-.-', '..-.'), ('-', ''), ('.---', '.--'), ('----', '---'), ('---K', '--K'), ('---', '--'), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.----', '.---'), ('o-', 'o'), ('--...-', '-...-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('t---.-', 't---.'), ('--', '-'), ('---', '--'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-----', '----'), ('-', ''), ('----', '---'), ('-', ''), ('-...', '...'), ('-.-..', '.-..'), ('-g', 'g'), ('-.', '.'), ('-.', '.'), ('--', '-'), ('A-', 'A'), ('--gi-...-', '-gi-...-'), ('-', ''), ('.---', '.--'), ('-..-..c.', '..-..c.'), ('-', ''), ('-.', '.'), ('-ds..-', 'ds..-'), ('--.', '-.'), ('-', ''), ('--', '-'), ('-o-r-', 'o-r-'), ('-.', '.'), ('.--', '.-'), ('..-', '..'), ('-', ''), ('-', ''), ('-', '')]
SOL19021001-V17-11-page64.txt: [('THEM-', 'THEM'), ('-', '')]
SOL19021001-V17-11-page7.txt: [('sen-', 'sen')]
SOL19021001-V17-11-page8.txt: [('SENTI-', 'SENTI')]
SOL19021001-V17-11-page9.txt: [('-', '')]
SOL19021101-V17-12-page11.txt: [('-United', 'United')]
SOL19021101-V17-12-page14.txt: [('al-', 'al')]
SOL19021101-V17-12-page17.txt: [('LIBER-', 'LIBER'), ('-have', 'have'), ('-bad', 'bad')]
SOL19021101-V17-12-page18.txt: [('-', '')]
SOL19021101-V17-12-page2.txt: [('COR-', 'COR')]
SOL19021101-V17-12-page22.txt: [('cer-', 'cer')]
SOL19021101-V17-12-page23.txt: [('in-', 'in'), ('"en-', '"en')]
SOL19021101-V17-12-page24.txt: [('pop-', 'pop')]
SOL19021101-V17-12-page25.txt: [('trans-', 'trans'), ('-', '')]
SOL19021101-V17-12-page26.txt: [('-', '')]
SOL19021101-V17-12-page29.txt: [('Governor-', 'Governor'), ('-', '')]
SOL19021101-V17-12-page3.txt: [('"IN-', '"IN'), ('Lat-', 'Lat')]
SOL19021101-V17-12-page32.txt: [('representa-', 'representa'), ('gov-', 'gov')]
SOL19021101-V17-12-page34.txt: [('AD-', 'AD'), ('NUM-', 'NUM'), ('ARIS-', 'ARIS'), ('FOR-', 'FOR'), ('fre-', 'fre')]
SOL19021101-V17-12-page35.txt: [('MONOP-', 'MONOP'), ('INTER-', 'INTER'), ('PROP-', 'PROP'), ('CON-', 'CON'), ('RECOM-', 'RECOM'), ('DICTATOR-', 'DICTATOR'), ('parti-', 'parti')]
SOL19021101-V17-12-page36.txt: [('ESTAB-', 'ESTAB')]
SOL19021101-V17-12-page40.txt: [('Spanish-', 'Spanish'), ('-authorized', 'authorized')]
SOL19021101-V17-12-page41.txt: [('them-', 'them'), ('-this', 'this')]
SOL19021101-V17-12-page46.txt: [('ad-', 'ad')]
SOL19021101-V17-12-page47.txt: [('Hicks-', 'Hicks')]
SOL19021101-V17-12-page5.txt: [('"PON-', '"PON')]
SOL19021101-V17-12-page51.txt: [('-very', 'very')]
SOL19021101-V17-12-page54.txt: [('dis-', 'dis'), ('short-', 'short'), ('-and', 'and')]
SOL19021101-V17-12-page57.txt: [('oblitera-', 'oblitera')]
SOL19021101-V17-12-page60.txt: [('-', ''), ('SEN-', 'SEN'), ('-', '')]
SOL19021101-V17-12-page61.txt: [('-', ''), ('-', ''), ('testi-', 'testi')]
SOL19021101-V17-12-page62.txt: [('-', ''), ('-.', '.'), ('-.', '.'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('.-X-', '.-X'), ('-', ''), ('-.".', '.".'), ('-', ''), ('-', '')]
SOL19021101-V17-12-page63.txt: [('x-', 'x'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('espe-', 'espe'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('expect-', 'expect'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19021201-V17-13-page11.txt: [('An-', 'An'), ('corn-', 'corn')]
SOL19021201-V17-13-page13.txt: [('em-', 'em'), ('at-', 'at')]
SOL19021201-V17-13-page14.txt: [('-', '')]
SOL19021201-V17-13-page16.txt: [('anti-', 'anti')]
SOL19021201-V17-13-page25.txt: [('Record-', 'Record')]
SOL19021201-V17-13-page32.txt: [('pro-', 'pro')]
SOL19021201-V17-13-page33.txt: [('-', ''), ('SEN-', 'SEN')]
SOL19021201-V17-13-page35.txt: [('Ap-', 'Ap')]
SOL19021201-V17-13-page36.txt: [('SEN-', 'SEN')]
SOL19021201-V17-13-page38.txt: [('affilia-', 'affilia')]
SOL19021201-V17-13-page41.txt: [('compul-', 'compul')]
SOL19021201-V17-13-page46.txt: [('Mem-', 'Mem')]
SOL19021201-V17-13-page49.txt: [('anti-', 'anti'), ('denomina-', 'denomina'), ('re-', 're')]
SOL19021201-V17-13-page5.txt: [('DES-', 'DES')]
SOL19021201-V17-13-page57.txt: [('Afir-', 'Afir'), ('.N.-', '.N.'), ('No...."......-', 'No...."......'), ('N.-', 'N.'), ('S.-', 'S.'), ('.-', '.'), ('Ns.-', 'Ns.'), ('N.-', 'N.'), ('Nia-', 'Nia'), ('No.-', 'No.'), ('.-', '.'), ('...-', '...'), ('-...-', '...-'), ('-.--', '.--')]
SOL19021201-V17-13-page58.txt: [('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-x..', 'x..'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.')]
SOL19021201-V17-13-page59.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.X.', '.X.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19021201-V17-13-page60.txt: [('-', ''), ('-', ''), ('.-', '.')]
SOL19021201-V17-13-page61.txt: [('Inde-', 'Inde'), ('-', '')]
SOL19021201-V17-13-page62.txt: [('Ques-', 'Ques'), ('Comment-', 'Comment'), ('-', ''), ('Else-', 'Else'), ('Enforcement-', 'Enforcement'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('PAGE-', 'PAGE'), ('-', ''), ('-', ''), ('ADVERTISEMENTS-', 'ADVERTISEMENTS'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Mas-', 'Mas'), ('Un-', 'Un'), ('Paragraphs-', 'Paragraphs'), ('-', ''), ('-', '')]
SOL19030101-V18-01-page11.txt: [('-OF', 'OF'), ('pay-', 'pay')]
SOL19030101-V18-01-page14.txt: [('Anti-', 'Anti'), ('Anti-', 'Anti')]
SOL19030101-V18-01-page17.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030101-V18-01-page18.txt: [('-', ''), ('-', '')]
SOL19030101-V18-01-page2.txt: [("'--", "'-"), ('r---', 'r--'), ('.-', '.'), ('--', '-'), ('Pr-', 'Pr'), ('-', '')]
SOL19030101-V18-01-page20.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030101-V18-01-page6.txt: [('reply--', 'reply-')]
SOL19030101-V18-01-page7.txt: [('-with', 'with')]
SOL19030101-V18-01-page8.txt: [('SEN-', 'SEN'), ('SEN-', 'SEN')]
SOL19030108-V18-02-page11.txt: [('-pope', 'pope'), ('-', '')]
SOL19030108-V18-02-page12.txt: [('am-', 'am')]
SOL19030108-V18-02-page18.txt: [('prac-', 'prac'), ('Apth-', 'Apth'), ('-', ''), ('deport-', 'deport'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030108-V18-02-page2.txt: [('-', ''), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-.', '.'), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030108-V18-02-page20.txt: [('-', ''), ('-', ''), ('-', ''), ('-x', 'x'), ('-', ''), ('-', ''), ('-', ''), ('-x-', 'x-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-x-', 'x-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('x-', 'x'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030108-V18-02-page9.txt: [('be-', 'be')]
SOL19030115-V18-03-page10.txt: [('-', '')]
SOL19030115-V18-03-page13.txt: [('dur-', 'dur')]
SOL19030115-V18-03-page15.txt: [('-cent', 'cent'), ('-cent', 'cent'), ('ex-', 'ex')]
SOL19030115-V18-03-page16.txt: [('ref-', 'ref'), ('mo-', 'mo'), ('rather-', 'rather'), ('to-', 'to')]
SOL19030115-V18-03-page18.txt: [('irk-', 'irk'), ('-', ''), ('....-', '....'), ('...-', '...'), ('........-', '........'), ('....-', '....'), ('-....', '....'), ('-', ''), ('-', ''), ('........-', '........'), ('-', ''), ('-.do', '.do'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('--', '-'), ('aor....or-', 'aor....or'), ('..-', '..'), ('.-', '.'), ('..-', '..'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-NI', 'NI'), ('-.', '.'), ('-.', '.')]
SOL19030115-V18-03-page2.txt: [('Dis-', 'Dis')]
SOL19030115-V18-03-page20.txt: [('-Thoughts', 'Thoughts'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('contro-', 'contro'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Apos-', 'Apos'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', '')]
SOL19030115-V18-03-page5.txt: [('or-', 'or')]
SOL19030122-V18-04-page10.txt: [('un-', 'un')]
SOL19030122-V18-04-page12.txt: [('de-', 'de')]
SOL19030122-V18-04-page13.txt: [('-new', 'new'), ('-', '')]
SOL19030122-V18-04-page14.txt: [('demands.-', 'demands.')]
SOL19030122-V18-04-page16.txt: [('Sab-', 'Sab')]
SOL19030122-V18-04-page17.txt: [('TERI-', 'TERI'), ('City.-', 'City.'), ('SEN-', 'SEN')]
SOL19030122-V18-04-page20.txt: [('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-.', '.'), ('-', ''), ('.-', '.'), ('-', ''), ('-.', '.'), ('-', '')]
SOL19030122-V18-04-page3.txt: [('L-', 'L')]
SOL19030122-V18-04-page6.txt: [('-and', 'and'), ('-vote', 'vote')]
SOL19030122-V18-04-page8.txt: [('-with', 'with')]
SOL19030129-V18-05-page12.txt: [('-as', 'as'), ('-comment', 'comment'), ('de-', 'de'), ('Philip-', 'Philip')]
SOL19030129-V18-05-page14.txt: [('-to', 'to'), ('"-', '"')]
SOL19030129-V18-05-page17.txt: [('ap-', 'ap')]
SOL19030129-V18-05-page18.txt: [('"-', '"')]
SOL19030129-V18-05-page19.txt: [('-r', 'r')]
SOL19030129-V18-05-page2.txt: [('C.C.C--', 'C.C.C-'), ('-C-C-', 'C-C-'), ('C.C-', 'C.C'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030129-V18-05-page20.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', '')]
SOL19030129-V18-05-page3.txt: [('relig-', 'relig')]
SOL19030129-V18-05-page6.txt: [('-terrible', 'terrible')]
SOL19030129-V18-05-page7.txt: [('self-gov-', 'self-gov')]
SOL19030129-V18-05-page9.txt: [('notwith-', 'notwith')]
SOL19030205-V18-06-page10.txt: [('ses-', 'ses')]
SOL19030205-V18-06-page15.txt: [('by-', 'by'), ('SEN-', 'SEN')]
SOL19030205-V18-06-page16.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('contro-', 'contro'), ('-', ''), ('Apos-', 'Apos')]
SOL19030205-V18-06-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-X-X-', 'X-X-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('N-', 'N')]
SOL19030205-V18-06-page3.txt: [('-', '')]
SOL19030205-V18-06-page6.txt: [('estab-', 'estab')]
SOL19030205-V18-06-page9.txt: [('-', '')]
SOL19030212-V18-07-page10.txt: [('-and', 'and'), ('-even', 'even'), ('Out-', 'Out'), ('-has', 'has'), ('con-', 'con'), ('-ducted', 'ducted'), ('SENTI-', 'SENTI')]
SOL19030212-V18-07-page12.txt: [('commit-', 'commit')]
SOL19030212-V18-07-page13.txt: [('constitu-', 'constitu')]
SOL19030212-V18-07-page14.txt: [('indus-', 'indus'), ('-', '')]
SOL19030212-V18-07-page15.txt: [('out--', 'out-'), ('Mo-', 'Mo')]
SOL19030212-V18-07-page16.txt: [('-', ''), ('-', '')]
SOL19030212-V18-07-page2.txt: [('EVERY-', 'EVERY')]
SOL19030212-V18-07-page5.txt: [('Euro-', 'Euro')]
SOL19030219-V18-08-page10.txt: [('--Its', '-Its')]
SOL19030219-V18-08-page11.txt: [('District-', 'District')]
SOL19030219-V18-08-page14.txt: [('-', '')]
SOL19030219-V18-08-page15.txt: [('City-', 'City'), ('OF-', 'OF'), ('renew--', 'renew-')]
SOL19030219-V18-08-page16.txt: [('-', ''), ("-'", "'"), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030219-V18-08-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030219-V18-08-page4.txt: [('news-', 'news')]
SOL19030219-V18-08-page6.txt: [('DIFFER-', 'DIFFER')]
SOL19030219-V18-08-page7.txt: [('-that', 'that'), ('-great', 'great'), ('-of', 'of')]
SOL19030219-V18-08-page8.txt: [('un-', 'un'), ('-', '')]
SOL19030226-V18-09-page10.txt: [('-of', 'of')]
SOL19030226-V18-09-page12.txt: [('German-', 'German')]
SOL19030226-V18-09-page13.txt: [('SENTI-', 'SENTI')]
SOL19030226-V18-09-page16.txt: [('V-', 'V'), ('-TN', 'TN'), ('-v', 'v'), ('v-', 'v'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030226-V18-09-page2.txt: [('SEN-', 'SEN'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030226-V18-09-page3.txt: [('de-', 'de')]
SOL19030226-V18-09-page5.txt: [('forbid-', 'forbid')]
SOL19030226-V18-09-page7.txt: [('prin-', 'prin'), ('-the', 'the')]
SOL19030226-V18-09-page8.txt: [('-', '')]
SOL19030305-V18-10-page11.txt: [('extrav-', 'extrav')]
SOL19030305-V18-10-page12.txt: [('Sun--', 'Sun-')]
SOL19030305-V18-10-page13.txt: [('SEN-', 'SEN'), ('neg-', 'neg')]
SOL19030305-V18-10-page14.txt: [('the-', 'the')]
SOL19030305-V18-10-page15.txt: [('SEN-', 'SEN')]
SOL19030305-V18-10-page16.txt: [('...-', '...'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030305-V18-10-page2.txt: [('-', ''), ('for-', 'for'), ('destroy-', 'destroy'), ('-', ''), ('-', ''), ('-', '')]
SOL19030305-V18-10-page3.txt: [('-', ''), ('ex-', 'ex')]
SOL19030305-V18-10-page4.txt: [('-', '')]
SOL19030305-V18-10-page7.txt: [('Con-', 'Con')]
SOL19030305-V18-10-page8.txt: [('when-', 'when'), ('pur-', 'pur')]
SOL19030305-V18-10-page9.txt: [('mer-', 'mer')]
SOL19030312-V18-11-page1.txt: [('-', ''), ('-', ''), ('-', '')]
SOL19030312-V18-11-page10.txt: [('con-', 'con')]
SOL19030312-V18-11-page11.txt: [('in-', 'in')]
SOL19030312-V18-11-page12.txt: [('IN-', 'IN'), ('DES-', 'DES')]
SOL19030312-V18-11-page14.txt: [('-', ''), ('-', ''), ('-', '')]
SOL19030312-V18-11-page15.txt: [('corn-', 'corn'), ('tiara.-', 'tiara.'), ('-', ''), ('-', ''), ('address---', 'address--'), ('-', ''), ('-', ''), ('-', '')]
SOL19030312-V18-11-page16.txt: [('-', ''), ('...-', '...'), ('-', ''), ('--', '-'), ('----', '---'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('A-', 'A'), ('-', ''), ('.z.kcYes-', '.z.kcYes'), ('-', ''), ('--', '-'), ('Yk-', 'Yk'), ('-Y---.--.', 'Y---.--.'), ('-At-', 'At-'), ('-', ''), ('---', '--'), ("-'", "'"), ('-', ''), ('--', '-'), ('-', ''), ('---"', '--"'), ('--Listed', '-Listed'), ('--Address--', '-Address--')]
SOL19030312-V18-11-page3.txt: [('-', ''), ('-', ''), ('.-', '.'), ('-', ''), ('-I.', 'I.'), ('-', ''), ('-', ''), ('-TT', 'TT'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('convic-', 'convic'), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', '')]
SOL19030312-V18-11-page4.txt: [('-', '')]
SOL19030312-V18-11-page5.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030312-V18-11-page6.txt: [('Whoso-', 'Whoso'), ('fam-', 'fam')]
SOL19030312-V18-11-page7.txt: [('er-', 'er')]
SOL19030312-V18-11-page8.txt: [('re-', 're')]
SOL19030319-V18-12-page1.txt: [('-YORK', 'YORK'), ('con-', 'con')]
SOL19030319-V18-12-page10.txt: [('-origin', 'origin'), ('-of', 'of'), ('-steadily', 'steadily'), ('-through', 'through'), ('in-', 'in'), ('-numerable', 'numerable'), ('-prerogatives', 'prerogatives'), ('-and', 'and'), ('-held', 'held'), ("-Wylie's", "Wylie's"), ('demon-', 'demon')]
SOL19030319-V18-12-page11.txt: [('-quite', 'quite'), ('fun-', 'fun')]
SOL19030319-V18-12-page12.txt: [('-', '')]
SOL19030319-V18-12-page14.txt: [('-money', 'money'), ('deceive-', 'deceive')]
SOL19030319-V18-12-page15.txt: [('PUBLISHING-', 'PUBLISHING')]
SOL19030319-V18-12-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030319-V18-12-page4.txt: [('charac-', 'charac')]
SOL19030319-V18-12-page7.txt: [('Pos-', 'Pos')]
SOL19030319-V18-12-page8.txt: [('Infal-', 'Infal')]
SOL19030319-V18-12-page9.txt: [('-for', 'for')]
SOL19030326-V18-13-page10.txt: [('-the', 'the'), ('en-', 'en'), ('-', ''), ('Sabbath.-', 'Sabbath.')]
SOL19030326-V18-13-page15.txt: [('-', ''), ('-', ''), ('AZ-', 'AZ'), ('--', '-'), ('e-', 'e'), ('-', ''), ('e...-', 'e...'), ('e.ae-t-', 'e.ae-t'), ('"Law-', '"Law')]
SOL19030326-V18-13-page2.txt: [('by-', 'by')]
SOL19030326-V18-13-page3.txt: [('op-', 'op'), ('pro-', 'pro')]
SOL19030326-V18-13-page4.txt: [('passage-', 'passage')]
SOL19030326-V18-13-page6.txt: [('con-', 'con')]
SOL19030402-V18-14-page15.txt: [('-', ''), ('-', ''), ('A-', 'A'), ('.-', '.'), ('thaw-', 'thaw'), ('It-', 'It'), ('-', '')]
SOL19030402-V18-14-page16.txt: [('G-', 'G')]
SOL19030402-V18-14-page5.txt: [('-', '')]
SOL19030402-V18-14-page9.txt: [('con-', 'con')]
SOL19030409-V18-15-page10.txt: [('Phil-', 'Phil')]
SOL19030409-V18-15-page13.txt: [('-the', 'the')]
SOL19030409-V18-15-page15.txt: [('-', ''), ('-', ''), ('-', ''), ('et-', 'et'), ('-u-', 'u-'), ('-', ''), ('N-', 'N'), ('-ere.c', 'ere.c')]
SOL19030409-V18-15-page16.txt: [('ACCEPT-', 'ACCEPT'), ('UT-', 'UT'), ('CAUSE-', 'CAUSE')]
SOL19030409-V18-15-page4.txt: [('recog-', 'recog')]
SOL19030409-V18-15-page6.txt: [('per-', 'per')]
SOL19030409-V18-15-page7.txt: [('-at', 'at')]
SOL19030416-V18-16-page12.txt: [('Harris-', 'Harris')]
SOL19030416-V18-16-page14.txt: [('type.-', 'type.')]
SOL19030416-V18-16-page15.txt: [('-', ''), ('appli-', 'appli'), ('-', '')]
SOL19030416-V18-16-page16.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030416-V18-16-page3.txt: [('se-', 'se')]
SOL19030416-V18-16-page5.txt: [('under-', 'under')]
SOL19030416-V18-16-page8.txt: [('neces-', 'neces')]
SOL19030423-V18-17-page1.txt: [('PROHI-', 'PROHI'), ('RE-', 'RE')]
SOL19030423-V18-17-page12.txt: [('"-', '"')]
SOL19030423-V18-17-page15.txt: [('-', ''), ('-', '')]
SOL19030423-V18-17-page4.txt: [('where-', 'where')]
SOL19030423-V18-17-page6.txt: [('Mc-', 'Mc')]
SOL19030423-V18-17-page9.txt: [('Sab-', 'Sab'), ('-', '')]
SOL19030430-V18-18-page10.txt: [('-themselves', 'themselves'), ('un-', 'un'), ('-certainty', 'certainty')]
SOL19030430-V18-18-page12.txt: [('-', ''), ('on-', 'on')]
SOL19030430-V18-18-page13.txt: [('express-', 'express')]
SOL19030430-V18-18-page14.txt: [('pur-', 'pur')]
SOL19030430-V18-18-page15.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030430-V18-18-page16.txt: [('SOME-', 'SOME'), ('IN-', 'IN')]
SOL19030430-V18-18-page4.txt: [('-dispatch', 'dispatch'), ('un-', 'un')]
SOL19030507-V18-19-page1.txt: [('GENER-', 'GENER'), ('SUB-', 'SUB'), ('OBEDI-', 'OBEDI')]
SOL19030507-V18-19-page10.txt: [('representa-', 'representa')]
SOL19030507-V18-19-page11.txt: [('non-', 'non'), ('-enforcement', 'enforcement'), ('-conditions.', 'conditions.'), ('-exist.', 'exist.'), ('-one', 'one'), ('-or', 'or')]
SOL19030507-V18-19-page12.txt: [('Sun-', 'Sun')]
SOL19030507-V18-19-page13.txt: [('viola-', 'viola')]
SOL19030507-V18-19-page15.txt: [('corn-', 'corn')]
SOL19030507-V18-19-page16.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('Apos-', 'Apos'), ('-The', 'The'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-x-', 'x-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030507-V18-19-page2.txt: [('-.', '.'), ('A-', 'A'), ('-.', '.'), ('-', ''), ('WW-', 'WW'), ("-'", "'"), ('Address-', 'Address'), ('-', ''), ('-', '')]
SOL19030507-V18-19-page3.txt: [('partici-', 'partici')]
SOL19030507-V18-19-page6.txt: [('Mc-', 'Mc'), ('non-', 'non')]
SOL19030507-V18-19-page7.txt: [('Mc-', 'Mc'), ('Mc-', 'Mc'), ('IG-', 'IG')]
SOL19030507-V18-19-page8.txt: [('-', '')]
SOL19030507-V18-19-page9.txt: [('-', ''), ('-', ''), ('-', '')]
SOL19030514-V18-20-page1.txt: [('IN-', 'IN')]
SOL19030514-V18-20-page16.txt: [('-.', '.'), ('-The', 'The'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030514-V18-20-page2.txt: [('-', ''), ('-', '')]
SOL19030514-V18-20-page6.txt: [('-our', 'our')]
SOL19030514-V18-20-page9.txt: [('-', ''), ('in-', 'in')]
SOL19030521-V18-21-page1.txt: [('HOW-', 'HOW')]
SOL19030521-V18-21-page10.txt: [('Times-', 'Times'), ('de-', 'de')]
SOL19030521-V18-21-page11.txt: [('forbid-', 'forbid')]
SOL19030521-V18-21-page15.txt: [('some-', 'some'), ('-', ''), ('-', ''), ("-'V", "'V"), ('-Z', 'Z'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-..', '..'), ('-', ''), ('-', ''), ('-', '')]
SOL19030521-V18-21-page2.txt: [('the--', 'the-'), ('-', ''), ('-', '')]
SOL19030521-V18-21-page8.txt: [('Mc-', 'Mc')]
SOL19030528-V18-22-page1.txt: [('-UTION', 'UTION')]
SOL19030528-V18-22-page14.txt: [('TRAFFIC-', 'TRAFFIC')]
SOL19030528-V18-22-page15.txt: [('con-', 'con'), ('-', ''), ('def-', 'def')]
SOL19030528-V18-22-page16.txt: [('--', '-')]
SOL19030528-V18-22-page2.txt: [('go-', 'go'), ('--ft', '-ft'), ('-', ''), ('-', ''), ('r-', 'r'), ('P-Tfansel-', 'P-Tfansel'), ('"--', '"-'), ('-', ''), ('-', ''), ('..-', '..'), ('-', ''), ('particu-', 'particu'), ('the---', 'the--'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.....-', '.....'), ('-', ''), ('-', ''), ('-', ''), ('--.-', '-.-'), ('---', '--')]
SOL19030528-V18-22-page5.txt: [('SEN-', 'SEN')]
SOL19030528-V18-22-page6.txt: [('-contention', 'contention')]
SOL19030528-V18-22-page8.txt: [('Com-', 'Com')]
SOL19030604-V18-23-page10.txt: [('-preamble', 'preamble'), ('-annual', 'annual')]
SOL19030604-V18-23-page11.txt: [('ut-', 'ut')]
SOL19030604-V18-23-page12.txt: [('dis-', 'dis'), ('-', '')]
SOL19030604-V18-23-page13.txt: [('-week', 'week'), ('-usual', 'usual')]
SOL19030604-V18-23-page15.txt: [('Cor-', 'Cor')]
SOL19030604-V18-23-page16.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030604-V18-23-page2.txt: [('-', ''), ('-.-', '.-'), ('-', ''), ("'-", "'"), ('-', ''), ('A.-', 'A.'), ('-', ''), ('-i', 'i'), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030604-V18-23-page4.txt: [('Freethink-', 'Freethink')]
SOL19030604-V18-23-page9.txt: [('"--', '"-')]
SOL19030611-V18-24-page11.txt: [('SEN-', 'SEN')]
SOL19030611-V18-24-page16.txt: [('-alion', 'alion')]
SOL19030611-V18-24-page2.txt: [('-', ''), ('-', '')]
SOL19030611-V18-24-page3.txt: [('enforce-', 'enforce')]
SOL19030611-V18-24-page4.txt: [('asso-', 'asso')]
SOL19030611-V18-24-page6.txt: [('it-', 'it')]
SOL19030611-V18-24-page9.txt: [('SEN-', 'SEN')]
SOL19030618-V18-25-page11.txt: [('un-', 'un'), ('-derlying', 'derlying')]
SOL19030618-V18-25-page13.txt: [('-other', 'other'), ('-a', 'a'), ('-was', 'was'), ('-conserved', 'conserved'), ('-do', 'do'), ('-churches', 'churches'), ('signa-', 'signa')]
SOL19030618-V18-25-page14.txt: [('-', '')]
SOL19030618-V18-25-page16.txt: [('-A.', 'A.'), ('ASIIING-', 'ASIIING')]
SOL19030618-V18-25-page3.txt: [('-', ''), ('per-', 'per'), ('-', ''), ('-', ''), ('over-', 'over')]
SOL19030618-V18-25-page6.txt: [('prom-', 'prom'), ('denomi-', 'denomi')]
SOL19030618-V18-25-page7.txt: [('non-', 'non'), ('legal-', 'legal')]
SOL19030618-V18-25-page9.txt: [('pre-', 'pre')]
SOL19030625-V18-26-page10.txt: [('Chris-', 'Chris')]
SOL19030625-V18-26-page11.txt: [('-we', 'we'), ('so-', 'so'), ('-called', 'called')]
SOL19030625-V18-26-page12.txt: [('-', '')]
SOL19030625-V18-26-page13.txt: [('al-', 'al')]
SOL19030625-V18-26-page15.txt: [('eccen-', 'eccen')]
SOL19030625-V18-26-page16.txt: [('-', ''), ('-', ''), ('-', ''), ('"-', '"'), ('-', ''), ('-.', '.'), ('PUBLISHING-', 'PUBLISHING')]
SOL19030625-V18-26-page2.txt: [('-', ''), ('SENTI-', 'SENTI'), ('SENTI-', 'SENTI')]
SOL19030625-V18-26-page3.txt: [('Vol-', 'Vol')]
SOL19030625-V18-26-page6.txt: [('suf-', 'suf')]
SOL19030625-V18-26-page7.txt: [('As-', 'As')]
SOL19030625-V18-26-page8.txt: [('-', ''), ('-', ''), ('lay-', 'lay')]
SOL19030702-V18-27-page10.txt: [('SEN-', 'SEN')]
SOL19030702-V18-27-page16.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030702-V18-27-page2.txt: [('-', '')]
SOL19030702-V18-27-page7.txt: [('com-', 'com'), ('dis-', 'dis')]
SOL19030709-V18-28-page10.txt: [('-', ''), ('-', '')]
SOL19030709-V18-28-page12.txt: [('coun-', 'coun')]
SOL19030709-V18-28-page14.txt: [('prin-', 'prin')]
SOL19030709-V18-28-page15.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030709-V18-28-page16.txt: [('-', ''), ('-Thoughts', 'Thoughts'), ('-', ''), ('-i', 'i'), ('-.', '.'), ('-', ''), ('-Heralds', 'Heralds'), ('-', ''), ('-', ''), ('-', ''), ('Apos-', 'Apos'), ('-', ''), ('-x-', 'x-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030709-V18-28-page4.txt: [('coun-', 'coun')]
SOL19030709-V18-28-page7.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030709-V18-28-page8.txt: [('-', ''), ('-', '')]
SOL19030716-V18-29-page12.txt: [('Maa-', 'Maa'), ('"-', '"')]
SOL19030716-V18-29-page13.txt: [('-the', 'the'), ('-those', 'those'), ('-physical', 'physical'), ('per-', 'per'), ('-sons', 'sons'), ('-in', 'in'), ('-that', 'that')]
SOL19030716-V18-29-page15.txt: [('-', '')]
SOL19030716-V18-29-page16.txt: [('-York', 'York')]
SOL19030716-V18-29-page2.txt: [('-', '')]
SOL19030716-V18-29-page6.txt: [('AMER-', 'AMER'), ('AMERICAN-', 'AMERICAN')]
SOL19030716-V18-29-page7.txt: [('Decalogue.-', 'Decalogue.'), ('with-', 'with')]
SOL19030723-V18-30-page1.txt: [('SIXTY-', 'SIXTY')]
SOL19030723-V18-30-page10.txt: [('-', '')]
SOL19030723-V18-30-page2.txt: [('the--', 'the-'), ('-', ''), ('-', '')]
SOL19030723-V18-30-page3.txt: [('-', '')]
SOL19030723-V18-30-page4.txt: [('pro-', 'pro'), ('-motion', 'motion')]
SOL19030723-V18-30-page5.txt: [('twenty-', 'twenty')]
SOL19030730-V18-31-page13.txt: [('-on', 'on')]
SOL19030730-V18-31-page16.txt: [('-', ''), ('PUBLISHING-', 'PUBLISHING')]
SOL19030730-V18-31-page2.txt: [('SENTI-', 'SENTI'), ('the--', 'the-'), ('-', ''), ('-', '')]
SOL19030730-V18-31-page5.txt: [('con-', 'con')]
SOL19030730-V18-31-page6.txt: [('Mc-', 'Mc')]
SOL19030730-V18-31-page8.txt: [('influ-', 'influ')]
SOL19030806-V18-32-page10.txt: [('state-', 'state')]
SOL19030806-V18-32-page11.txt: [('non-', 'non')]
SOL19030806-V18-32-page16.txt: [('A-', 'A'), ('-', ''), ('cog-', 'cog'), ('princi-', 'princi'), ('appli-', 'appli'), ('-', ''), ('-.', '.'), ('-.muy', '.muy'), ('-', ''), ('-', ''), ('-t', 't'), ('-d', 'd'), ('-', ''), ('PUBLISHING-', 'PUBLISHING')]
SOL19030806-V18-32-page2.txt: [('----', '---'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030806-V18-32-page3.txt: [('-N', 'N')]
SOL19030806-V18-32-page8.txt: [('legis-', 'legis')]
SOL19030813-V18-33-page1.txt: [('-', ''), ('-', '')]
SOL19030813-V18-33-page10.txt: [('is-', 'is'), ('-chap.', 'chap.'), ('-ever', 'ever')]
SOL19030813-V18-33-page13.txt: [('excite-', 'excite')]
SOL19030813-V18-33-page14.txt: [('en-', 'en')]
SOL19030813-V18-33-page16.txt: [('PUBLISHING-', 'PUBLISHING'), ('-York', 'York')]
SOL19030813-V18-33-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030813-V18-33-page3.txt: [('SEN-', 'SEN')]
SOL19030820-V18-34-page10.txt: [('emana-', 'emana')]
SOL19030820-V18-34-page16.txt: [('--', '-'), ('insepa-', 'insepa'), ('cog-', 'cog'), ('princi-', 'princi'), ('-', ''), ('-', ''), ("''-", "''"), ('-', ''), ('..-', '..')]
SOL19030820-V18-34-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19030820-V18-34-page6.txt: [('twenty-', 'twenty')]
SOL19030820-V18-34-page7.txt: [('re-', 're')]
SOL19030820-V18-34-page9.txt: [('ex-', 'ex')]
SOL19030827-V18-35-page11.txt: [('con-', 'con'), ('Mc-', 'Mc'), ('govern-', 'govern')]
SOL19030827-V18-35-page12.txt: [('branches-', 'branches')]
SOL19030827-V18-35-page15.txt: [('Au-', 'Au')]
SOL19030827-V18-35-page16.txt: [('.-', '.'), ("'--", "'-"), ('-', ''), ('fige-', 'fige'), ('-', ''), ('CO-V-', 'CO-V'), ('ISI-IING-', 'ISI-IING'), ('-York', 'York')]
SOL19030827-V18-35-page3.txt: [('in-', 'in'), ('Sunday-', 'Sunday'), ('cam-', 'cam')]
SOL19030827-V18-35-page4.txt: [('senti-', 'senti')]
SOL19030827-V18-35-page5.txt: [('Sunday-closing-en-', 'Sunday-closing-en')]
SOL19030827-V18-35-page6.txt: [('rev-', 'rev')]
SOL19030827-V18-35-page7.txt: [('-', '')]
SOL19030903-V18-36-page11.txt: [('prom-', 'prom')]
SOL19030903-V18-36-page12.txt: [('twenty-', 'twenty')]
SOL19030903-V18-36-page16.txt: [('-', ''), ('-abaladad-wWolaitaii', 'abaladad-wWolaitaii'), ('-', '')]
SOL19030903-V18-36-page2.txt: [('-', ''), ('-', '')]
SOL19030903-V18-36-page4.txt: [('gen-', 'gen')]
SOL19030903-V18-36-page6.txt: [('in-', 'in')]
SOL19030903-V18-36-page9.txt: [('Pro-', 'Pro')]
SOL19030910-V18-37-page13.txt: [('-', '')]
SOL19030910-V18-37-page15.txt: [('-drama', 'drama')]
SOL19030910-V18-37-page16.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.')]
SOL19030910-V18-37-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('val-', 'val')]
SOL19030910-V18-37-page6.txt: [('Mc-', 'Mc')]
SOL19030910-V18-37-page8.txt: [('-of', 'of')]
SOL19030917-V18-38-page12.txt: [('None--', 'None-')]
SOL19030917-V18-38-page13.txt: [('-entered', 'entered'), ('-contains', 'contains'), ('-of', 'of')]
SOL19030917-V18-38-page14.txt: [('com-', 'com')]
SOL19030917-V18-38-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('val-', 'val'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-X-', 'X-'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-X-', 'X-'), ('-', ''), ('-', ''), ('-', '')]
SOL19030917-V18-38-page8.txt: [('Be-', 'Be')]
SOL19030924-V18-39-page12.txt: [('new-', 'new'), ('declen-', 'declen')]
SOL19030924-V18-39-page14.txt: [('De-', 'De')]
SOL19030924-V18-39-page2.txt: [('val-', 'val')]
SOL19030924-V18-39-page3.txt: [('-', '')]
SOL19030924-V18-39-page8.txt: [('instruc-', 'instruc')]
SOL19031001-V18-40-page1.txt: [('-', '')]
SOL19031001-V18-40-page2.txt: [('hereto-', 'hereto')]
SOL19031001-V18-40-page3.txt: [('non-', 'non'), ('Ca-', 'Ca')]
SOL19031001-V18-40-page4.txt: [('Sum-', 'Sum')]
SOL19031001-V18-40-page7.txt: [('Kip-', 'Kip')]
SOL19031015-V18-41-page10.txt: [('con-', 'con'), ('there.-', 'there.')]
SOL19031015-V18-41-page12.txt: [('first-', 'first')]
SOL19031015-V18-41-page14.txt: [('peti-', 'peti')]
SOL19031015-V18-41-page16.txt: [('pse-', 'pse'), ("'-", "'"), ('-', ''), ('-V.', 'V.'), ('-', ''), ('-', ''), ('fillIK-', 'fillIK'), ('CLOTI-', 'CLOTI'), ('-', '')]
SOL19031015-V18-41-page2.txt: [('-Err', 'Err')]
SOL19031015-V18-41-page3.txt: [('Director-', 'Director')]
SOL19031015-V18-41-page5.txt: [('-religion', 'religion'), ('liberty-', 'liberty'), ('wood-gather-', 'wood-gather')]
SOL19031015-V18-41-page6.txt: [('-', ''), ('legalism."-', 'legalism."')]
SOL19031015-V18-41-page7.txt: [('en-', 'en')]
SOL19031015-V18-41-page8.txt: [('answer-', 'answer')]
SOL19031015-V18-41-page9.txt: [('re-', 're')]
SOL19031022-V18-42-page10.txt: [('-is', 'is')]
SOL19031022-V18-42-page15.txt: [('-', '')]
SOL19031022-V18-42-page16.txt: [('-', ''), ('-', ''), ('-the', 'the'), ('prindi-', 'prindi'), ('-', ''), ('appli-', 'appli'), ('-', ''), ('-Z', 'Z'), ('-RzI', 'RzI'), ('HERALD-', 'HERALD'), ("CILAJ'r-lI-", "CILAJ'r-lI"), ("men'-", "men'"), ("-'", "'"), ('A.N-', 'A.N'), ('I-', 'I'), ('-', '')]
SOL19031022-V18-42-page3.txt: [('-', '')]
SOL19031022-V18-42-page6.txt: [('Swit-', 'Swit')]
SOL19031022-V18-42-page9.txt: [('-peace', 'peace')]
SOL19031029-V18-43-page15.txt: [('ap-', 'ap')]
SOL19031029-V18-43-page16.txt: [('-', ''), ('StSttoryth--', 'StSttoryth-'), ('-', '')]
SOL19031029-V18-43-page3.txt: [('legis-', 'legis'), ('evi-', 'evi'), ('-', '')]
SOL19031029-V18-43-page7.txt: [('Sun-', 'Sun'), ('-worship."', 'worship."'), ('-Jews', 'Jews')]
SOL19031105-V18-44-page10.txt: [('-Christian', 'Christian')]
SOL19031105-V18-44-page15.txt: [('"frus-', '"frus'), ('-', '')]
SOL19031105-V18-44-page16.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19031105-V18-44-page3.txt: [('car-', 'car')]
SOL19031105-V18-44-page9.txt: [('gov-', 'gov')]
SOL19031112-V18-45-page1.txt: [('-', '')]
SOL19031112-V18-45-page13.txt: [('-ass', 'ass'), ('-postoffices."', 'postoffices."')]
SOL19031112-V18-45-page16.txt: [('-', ''), ('St-', 'St'), ('At-Alt-At-', 'At-Alt-At'), ('F-', 'F'), ('-', ''), ('-', '')]
SOL19031112-V18-45-page2.txt: [('-', ''), ('acknowl-', 'acknowl'), ('-', '')]
SOL19031112-V18-45-page3.txt: [('popula-', 'popula')]
SOL19031112-V18-45-page4.txt: [('condi-', 'condi')]
SOL19031112-V18-45-page5.txt: [('-', '')]
SOL19031112-V18-45-page8.txt: [('-must', 'must')]
SOL19031112-V18-45-page9.txt: [('con-', 'con')]
SOL19031119-V18-46-page12.txt: [('-excellent', 'excellent')]
SOL19031119-V18-46-page13.txt: [('-were', 'were')]
SOL19031119-V18-46-page15.txt: [('propo-', 'propo')]
SOL19031119-V18-46-page16.txt: [('-Ak', 'Ak'), ('.-', '.'), ('Avilosiotioti-', 'Avilosiotioti'), ('...--', '...-'), ('-', '')]
SOL19031119-V18-46-page2.txt: [('-We', 'We'), ('-', '')]
SOL19031119-V18-46-page3.txt: [('ELI-', 'ELI')]
SOL19031126-V18-47-page15.txt: [('-THE', 'THE')]
SOL19031126-V18-47-page2.txt: [('G-', 'G')]
SOL19031126-V18-47-page4.txt: [('re-', 're')]
SOL19031126-V18-47-page7.txt: [('-press', 'press')]
SOL19031126-V18-47-page8.txt: [('state-', 'state')]
SOL19031126-V18-47-page9.txt: [('prin-', 'prin')]
SOL19031203-V18-48-page11.txt: [('pub-', 'pub')]
SOL19031203-V18-48-page13.txt: [('-whole', 'whole'), ('recog-', 'recog')]
SOL19031203-V18-48-page14.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
SOL19031203-V18-48-page15.txt: [('-', ''), ('-', ''), ('dark--', 'dark-')]
SOL19031203-V18-48-page5.txt: [('-', '')]
SOL19031203-V18-48-page7.txt: [('re-', 're'), ('irrelig-', 'irrelig'), ('pri-', 'pri')]
SOL19031210-V18-49-page10.txt: [('Inter-', 'Inter')]
SOL19031210-V18-49-page11.txt: [('un-', 'un')]
SOL19031210-V18-49-page13.txt: [('Mc-', 'Mc')]
SOL19031210-V18-49-page14.txt: [('ob-', 'ob')]
SOL19031210-V18-49-page4.txt: [('-but', 'but'), ('re-', 're')]
SOL19031210-V18-49-page7.txt: [('except-', 'except')]
SOL19031210-V18-49-page8.txt: [('dis-', 'dis')]
SOL19031210-V18-49-page9.txt: [('claim-', 'claim')]
SOL19031217-V18-50-page11.txt: [('observ-', 'observ')]
SOL19031217-V18-50-page12.txt: [('-old', 'old')]
SOL19031217-V18-50-page16.txt: [('am-', 'am')]
SOL19031217-V18-50-page9.txt: [('-', ''), ('-on', 'on')]
SOL19031224-V18-51-page11.txt: [('jus-', 'jus'), ('out-', 'out')]
SOL19031224-V18-51-page13.txt: [('re-', 're')]
SOL19031224-V18-51-page2.txt: [('-', '')]
SOL19031224-V18-51-page5.txt: [('Sunday-', 'Sunday')]
SOL19031224-V18-51-page8.txt: [('--EDITOR.', '-EDITOR.'), ('for-', 'for')]
SOL19031231-V18-52-page10.txt: [('un-', 'un'), ('-on', 'on'), ('man-', 'man')]
SOL19031231-V18-52-page12.txt: [('news--', 'news-')]
SOL19031231-V18-52-page13.txt: [('be-', 'be')]
SOL19031231-V18-52-page3.txt: [('I-', 'I')]
SOL19031231-V18-52-page8.txt: [('-or', 'or')]
SOL19040107-V19-01-page10.txt: [('-work', 'work')]
SOL19040107-V19-01-page14.txt: [('-', '')]
SOL19040107-V19-01-page15.txt: [('pre-', 'pre'), ('Hon-', 'Hon'), ('strin-', 'strin'), ('-representative', 'representative'), ('Pen.-', 'Pen.')]
SOL19040107-V19-01-page3.txt: [('him-', 'him'), ('-Sentinel', 'Sentinel')]
SOL19040107-V19-01-page5.txt: [('supersti-', 'supersti')]
SOL19040107-V19-01-page7.txt: [('Congress-', 'Congress')]
SOL19040107-V19-01-page9.txt: [('acknowl-', 'acknowl')]
SOL19040114-V19-02-page11.txt: [('dur-', 'dur')]
SOL19040114-V19-02-page12.txt: [('open-', 'open'), ('direc-', 'direc')]
SOL19040114-V19-02-page14.txt: [('Rem-', 'Rem')]
SOL19040114-V19-02-page2.txt: [('separa-', 'separa')]
SOL19040114-V19-02-page3.txt: [('-of', 'of'), ('Sunday-', 'Sunday')]
SOL19040114-V19-02-page5.txt: [('Chris-', 'Chris'), ('tak-', 'tak')]
SOL19040114-V19-02-page6.txt: [('in-', 'in')]
SOL19040114-V19-02-page8.txt: [('institu-', 'institu')]
SOL19040121-V19-03-page1.txt: [('Pennsyl-', 'Pennsyl')]
SOL19040121-V19-03-page13.txt: [('-"Sabbath"', '"Sabbath"'), ('-', '')]
SOL19040121-V19-03-page3.txt: [('Op-', 'Op'), ('shout-', 'shout')]
SOL19040121-V19-03-page4.txt: [('-that', 'that'), ('-that', 'that'), ('as-', 'as'), ('-serted', 'serted'), ('as-', 'as'), ('-serted', 'serted'), ('-State', 'State'), ('-of', 'of'), ('-deference', 'deference')]
SOL19040121-V19-03-page5.txt: [('Green-', 'Green')]
SOL19040121-V19-03-page6.txt: [('what-', 'what')]
SOL19040121-V19-03-page7.txt: [('--', '-'), ('--"', '-"')]
SOL19040121-V19-03-page8.txt: [('-in', 'in')]
SOL19040121-V19-03-page9.txt: [('cru-', 'cru')]
SOL19040128-V19-04-page10.txt: [('-Ruler', 'Ruler'), ('to-', 'to')]
SOL19040128-V19-04-page11.txt: [('-who', 'who'), ('pub-', 'pub')]
SOL19040128-V19-04-page12.txt: [('objet-', 'objet')]
SOL19040128-V19-04-page13.txt: [('-In', 'In')]
SOL19040128-V19-04-page14.txt: [('instru-', 'instru')]
SOL19040128-V19-04-page2.txt: [('author-', 'author')]
SOL19040128-V19-04-page5.txt: [('al-', 'al')]
SOL19040128-V19-04-page6.txt: [('-', ''), ('-the', 'the')]
SOL19040128-V19-04-page8.txt: [('di-', 'di')]
SOL19040204-V19-05-page11.txt: [('-on', 'on'), ('eor-', 'eor')]
SOL19040204-V19-05-page13.txt: [('-necessity', 'necessity'), ('-were', 'were')]
SOL19040204-V19-05-page14.txt: [('the-', 'the'), ('un-', 'un')]
SOL19040204-V19-05-page16.txt: [('-', '')]
SOL19040204-V19-05-page2.txt: [('-', '')]
SOL19040204-V19-05-page4.txt: [('-orders', 'orders'), ('-was', 'was'), ('-of', 'of'), ('ob-', 'ob'), ('-servance."', 'servance."')]
SOL19040204-V19-05-page5.txt: [('graft-', 'graft'), ('g-', 'g')]
SOL19040204-V19-05-page6.txt: [('Asso-', 'Asso'), ('dis-', 'dis')]
SOL19040204-V19-05-page7.txt: [('-matter', 'matter'), ('-which', 'which')]
SOL19040211-V19-06-page10.txt: [('un-', 'un')]
SOL19040211-V19-06-page11.txt: [('-that', 'that'), ('Latin-', 'Latin'), ('co-', 'co')]
SOL19040211-V19-06-page12.txt: [('Com-', 'Com')]
SOL19040211-V19-06-page14.txt: [('to-', 'to'), ('Roman-', 'Roman'), ('The-', 'The')]
SOL19040211-V19-06-page2.txt: [('the-', 'the'), ('PUBLISH-', 'PUBLISH')]
SOL19040211-V19-06-page3.txt: [('-religious', 'religious'), ('Catholic-', 'Catholic')]
SOL19040211-V19-06-page6.txt: [('assign-', 'assign'), ('Pub-', 'Pub'), ('ig-', 'ig')]
SOL19040211-V19-06-page7.txt: [('-the', 'the')]
SOL19040211-V19-06-page8.txt: [('non-', 'non')]
In [20]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/SOL/correction3

Average verified rate: 0.9841079849444068

Average of error rates: 0.018933463796477496

Total token count: 1248174

In [21]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[21]:
[("'", 981),
 ('t', 750),
 ('n', 506),
 ('e', 479),
 ('d', 416),
 ('w', 410),
 ('th', 397),
 ('m', 334),
 ('g', 260),
 ('x', 250),
 ('co', 245),
 ('r', 203),
 ('f', 199),
 ('sunday-closing', 180),
 ('u', 147),
 ('sunday-enforcement', 125),
 ('sunday-law', 116),
 ("clerks'", 96),
 ('pa', 96),
 ('z', 84),
 ('mo', 79),
 ('k', 70),
 ('io', 56),
 ("'the", 53),
 ('oo', 50),
 ("barbers'", 49),
 ('saloon-keepers', 46),
 ("grocers'", 45),
 ('wm', 44),
 ('re', 42),
 ('mutchler', 42),
 ('church-and-state', 41),
 ('tion', 40),
 ('loth', 38),
 ('law-abiding', 35),
 ('ex', 33),
 ('mass-meeting', 31),
 ('q', 30),
 ("jones'", 29),
 ('street-cars', 29),
 ("o'gorman", 27),
 ('farmakis', 27),
 ('saloonmen', 27),
 ('sundayclosing', 26),
 ('rest-day', 25),
 ('ourduty', 25),
 ('ment', 25),
 ('lc', 25),
 ('al', 24),
 ('tinel', 24)]

Correction 4 -- Remove extra quotation marks

In [22]:
# %load shared_elements/replace_extra_quotation_marks.py
prev = "correction3"
cycle = "correction4"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    corrections = []
    for token in tokens:
        token_list = list(token)
        last_char = token_list[-1]

        if last_char is "'":
            if len(token) > 1:
                if token_list[-2] is 's' or 'S':
                    pass
                else:
                    corrections.append((token, re.sub(r"'", r"", token)))
            else:
                pass
        elif token[0] is "'":
            corrections.append((token, re.sub(r"'", r"", token)))   
        else:
            pass
    
    if len(corrections) > 0:
        print('{}: {}'.format(filename, corrections))

        for correction in corrections:
            content = clean.replace_pair(correction, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
SOL19000510-V15-17-page13.txt: [("'In", 'In')]
SOL19000510-V15-17-page14.txt: [("'same", 'same')]
SOL19000510-V15-17-page15.txt: [("'half", 'half')]
SOL19000510-V15-17-page16.txt: [("'Tony", 'Tony'), ("'orders", 'orders'), ("'The", 'The')]
SOL19000510-V15-17-page6.txt: [("'Him", 'Him')]
SOL19000510-V15-17-page7.txt: [("'Tarry", 'Tarry')]
SOL19000510-V15-17-page8.txt: [("'Thou", 'Thou'), ("'and", 'and')]
SOL19000517-V15-18-page12.txt: [("'goo", 'goo')]
SOL19000517-V15-18-page16.txt: [("'zany", 'zany')]
SOL19000517-V15-18-page2.txt: [("'the", 'the')]
SOL19000517-V15-18-page4.txt: [("'As", 'As'), ("'Hail", 'Hail')]
SOL19000517-V15-18-page7.txt: [("'Blessing", 'Blessing')]
SOL19000517-V15-18-page9.txt: [("'The", 'The')]
SOL19000531-V15-21-page12.txt: [("'pleading", 'pleading')]
SOL19000531-V15-21-page14.txt: [("'to", 'to')]
SOL19000531-V15-21-page16.txt: [("'judge", 'judge')]
SOL19000531-V15-21-page4.txt: [("'We", 'We')]
SOL19000531-V15-21-page8.txt: [("'world", 'world')]
SOL19000614-V15-23-page12.txt: [("'It", 'It'), ("'I", 'I'), ("'Committee", 'Committee'), ("'We", 'We')]
SOL19000614-V15-23-page13.txt: [("'We", 'We'), ("'When", 'When')]
SOL19000614-V15-23-page6.txt: [("'I", 'I'), ("'Millo", 'Millo'), ("'The", 'The'), ("'apparently", 'apparently'), ("'the", 'the'), ("'two", 'two')]
SOL19000614-V15-23-page7.txt: [("'I", 'I'), ("'cedar", 'cedar'), ("'I", 'I')]
SOL19000614-V15-23-page8.txt: [("'All", 'All'), ("'duties", 'duties')]
SOL19000628-V15-25-page2.txt: [("'reject", 'reject')]
SOL19000628-V15-25-page3.txt: [("'The", 'The'), ("'other", 'other'), ("'person", 'person'), ("'such", 'such'), ("'person", 'person'), ("'territory", 'territory')]
SOL19000628-V15-25-page4.txt: [("'Constitutional", 'Constitutional'), ("'But", 'But'), ("'United", 'United'), ("'throughout", 'throughout'), ("'person", 'person')]
SOL19000628-V15-25-page5.txt: [("'spheres", 'spheres')]
SOL19000628-V15-25-page6.txt: [("'Eastern", 'Eastern')]
SOL19000628-V15-25-page7.txt: [("'conclusion", 'conclusion')]
SOL19000628-V15-25-page8.txt: [("'How", 'How')]
SOL19000628-V15-25-page9.txt: [("'Celestial", 'Celestial'), ("'and", 'and')]
SOL19000726-V15-29-page1.txt: [("'judge", 'judge')]
SOL19000726-V15-29-page13.txt: [("'Long", 'Long')]
SOL19000726-V15-29-page16.txt: [("'when", 'when'), ("'flocking", 'flocking')]
SOL19000726-V15-29-page2.txt: [("'Masters", 'Masters')]
SOL19000726-V15-29-page4.txt: [("'pernicious", 'pernicious'), ("'Hail", 'Hail')]
SOL19000726-V15-29-page8.txt: [("'a", 'a')]
SOL19000726-V15-29-page9.txt: [("'by", 'by'), ("'Sunday", 'Sunday')]
SOL19000802-V15-30-page10.txt: [("'Not", 'Not'), ("'how", 'how')]
SOL19000802-V15-30-page11.txt: [("'demand", 'demand'), ("'Under", 'Under'), ("'public", 'public')]
SOL19000802-V15-30-page12.txt: [("'line", 'line')]
SOL19000802-V15-30-page13.txt: [("'kept", 'kept')]
SOL19000802-V15-30-page2.txt: [("'Thus", 'Thus')]
SOL19000802-V15-30-page4.txt: [("'I", 'I')]
SOL19000802-V15-30-page8.txt: [("'Tis", 'Tis'), ("'Let", 'Let')]
SOL19000809-V15-31-page1.txt: [("'fief", 'fief')]
SOL19000809-V15-31-page10.txt: [("'Prussia", 'Prussia')]
SOL19000809-V15-31-page16.txt: [("'I", 'I')]
SOL19000809-V15-31-page3.txt: [("'America", 'America')]
SOL19000809-V15-31-page4.txt: [("'had", 'had')]
SOL19000809-V15-31-page5.txt: [("'s", 's')]
SOL19000809-V15-31-page9.txt: [("'should", 'should')]
SOL19000816-V15-32-page1.txt: [("'ang", 'ang')]
SOL19000816-V15-32-page11.txt: [("'is", 'is'), ('\'Christendom."', 'Christendom."'), ("'in", 'in')]
SOL19000816-V15-32-page12.txt: [("'Again", 'Again'), ("'Greatly", 'Greatly'), ("'spoke", 'spoke')]
SOL19000816-V15-32-page3.txt: [("'Jews", 'Jews')]
SOL19000816-V15-32-page4.txt: [("'cliaractef", 'cliaractef')]
SOL19000816-V15-32-page6.txt: [("'S", 'S'), ('\'state."', 'state."')]
SOL19000816-V15-32-page7.txt: [("'s", 's'), ("'Christ", 'Christ'), ("'the", 'the'), ("'that", 'that')]
SOL19000816-V15-32-page8.txt: [("'answer", 'answer')]
SOL19000823-V15-33-page11.txt: [("'this", 'this')]
SOL19000823-V15-33-page12.txt: [("'The", 'The'), ("'The", 'The'), ("'But", 'But')]
SOL19000823-V15-33-page13.txt: [("'contingent", 'contingent')]
SOL19000823-V15-33-page16.txt: [("'many", 'many')]
SOL19000823-V15-33-page2.txt: [("'thereof", 'thereof')]
SOL19000823-V15-33-page4.txt: [("'is", 'is')]
SOL19000823-V15-33-page8.txt: [("'violence.", 'violence.')]
SOL19000913-V15-36-page1.txt: [("'principles", 'principles')]
SOL19000913-V15-36-page10.txt: [("'in", 'in')]
SOL19000913-V15-36-page5.txt: [("'not", 'not')]
SOL19000913-V15-36-page9.txt: [("'vs.", 'vs.')]
SOL19000927-V15-38-page13.txt: [("'be", 'be')]
SOL19000927-V15-38-page16.txt: [("'A", 'A'), ("'arise", 'arise')]
SOL19000927-V15-38-page2.txt: [("'chemical", 'chemical')]
SOL19000927-V15-38-page3.txt: [("'needs", 'needs'), ("'from", 'from')]
SOL19000927-V15-38-page5.txt: [("'liable", 'liable'), ("'a", 'a'), ("'a", 'a')]
SOL19000927-V15-38-page6.txt: [("'will", 'will')]
SOL19000927-V15-38-page7.txt: [("'was", 'was'), ("'prison.", 'prison.')]
SOL19001005-V15-39-page10.txt: [("'the", 'the')]
SOL19001005-V15-39-page11.txt: [("'D.", 'D.')]
SOL19001005-V15-39-page13.txt: [("'missionary", 'missionary')]
SOL19001005-V15-39-page15.txt: [('\'"', '"'), ('\'\'\'\'"\'TTTTTTTTTTTTrrTTTTT.TTTTT', '"TTTTTTTTTTTTrrTTTTT.TTTTT'), ("'Ipe", 'Ipe')]
SOL19001005-V15-39-page4.txt: [("'utmost", 'utmost')]
SOL19001005-V15-39-page5.txt: [("'music", 'music')]
SOL19001005-V15-39-page6.txt: [("'show", 'show')]
SOL19001005-V15-39-page9.txt: [("'sovereign", 'sovereign')]
SOL19001101-V15-43-page1.txt: [("'goo.", 'goo.')]
SOL19001101-V15-43-page10.txt: [("'wicked", 'wicked')]
SOL19001101-V15-43-page13.txt: [("'We", 'We'), ("'We", 'We'), ("'How.", 'How.')]
SOL19001101-V15-43-page16.txt: [("'enforcement", 'enforcement')]
SOL19001101-V15-43-page2.txt: [("'rights", 'rights')]
SOL19001101-V15-43-page8.txt: [("'me", 'me')]
SOL19001108-V15-44-page16.txt: [("'..aw.", '..aw.')]
SOL19001108-V15-44-page3.txt: [("'is", 'is')]
SOL19001108-V15-44-page6.txt: [("'of", 'of'), ("'exceeding", 'exceeding')]
SOL19001108-V15-44-page9.txt: [("'half", 'half')]
SOL19011201-V17-01-page16.txt: [("'Violence", 'Violence')]
SOL19011201-V17-01-page19.txt: [("'of", 'of')]
SOL19011201-V17-01-page2.txt: [("'Is", 'Is')]
SOL19011201-V17-01-page24.txt: [("'The", 'The')]
SOL19011201-V17-01-page25.txt: [("'spirit", 'spirit')]
SOL19011201-V17-01-page30.txt: [("'power", 'power')]
SOL19011201-V17-01-page38.txt: [("'Congress", 'Congress'), ("'the", 'the')]
SOL19011201-V17-01-page41.txt: [("'to", 'to'), ("'church", 'church'), ("'hospital", 'hospital'), ("'orphan", 'orphan')]
SOL19011201-V17-01-page42.txt: [("'wide", 'wide'), ("'makes", 'makes')]
SOL19011201-V17-01-page43.txt: [("'put", 'put'), ("'great", 'great')]
SOL19011201-V17-01-page46.txt: [("'hold", 'hold')]
SOL19011201-V17-01-page48.txt: [("'this", 'this')]
SOL19011201-V17-01-page52.txt: [("'invasion", 'invasion')]
SOL19011201-V17-01-page53.txt: [("'draught", 'draught')]
SOL19011201-V17-01-page57.txt: [("'ie'w", 'iew')]
SOL19011201-V17-01-page58.txt: [("'Easy", 'Easy')]
SOL19020101-V17-02-page1.txt: [("'have", 'have')]
SOL19020101-V17-02-page14.txt: [("'The", 'The')]
SOL19020101-V17-02-page15.txt: [("'whatever", 'whatever'), ("'Sabbath", 'Sabbath')]
SOL19020101-V17-02-page17.txt: [("'a", 'a'), ("'sacred", 'sacred')]
SOL19020101-V17-02-page18.txt: [("'licensed", 'licensed')]
SOL19020101-V17-02-page19.txt: [("'resorts", 'resorts')]
SOL19020101-V17-02-page25.txt: [("'nature", 'nature')]
SOL19020101-V17-02-page28.txt: [("'Lord", 'Lord')]
SOL19020101-V17-02-page29.txt: [("'SUNDAY", 'SUNDAY'), ("'boycotted", 'boycotted'), ("'slavery", 'slavery')]
SOL19020101-V17-02-page39.txt: [("'will", 'will')]
SOL19020101-V17-02-page40.txt: [("'on", 'on')]
SOL19020101-V17-02-page42.txt: [("'put", 'put')]
SOL19020101-V17-02-page43.txt: [("'not", 'not')]
SOL19020101-V17-02-page44.txt: [("'damnation", 'damnation'), ("'to", 'to')]
SOL19020101-V17-02-page45.txt: [("'If", 'If')]
SOL19020101-V17-02-page51.txt: [("'Continental", 'Continental'), ("'should", 'should')]
SOL19020101-V17-02-page53.txt: [("'It", 'It')]
SOL19020101-V17-02-page56.txt: [("'The", 'The')]
SOL19020101-V17-02-page67.txt: [("'year's", 'years')]
SOL19020101-V17-02-page72.txt: [("'r", 'r')]
SOL19020201-V17-03-page18.txt: [("'We", 'We')]
SOL19020201-V17-03-page21.txt: [("'Who", 'Who')]
SOL19020201-V17-03-page34.txt: [("'service", 'service')]
SOL19020201-V17-03-page46.txt: [("'over", 'over')]
SOL19020201-V17-03-page53.txt: [("'rub", 'rub')]
SOL19020201-V17-03-page60.txt: [("'race.", 'race.'), ("'measures", 'measures')]
SOL19020201-V17-03-page64.txt: [("'ha", 'ha'), ("''whom", 'whom')]
SOL19020301-V17-04-page15.txt: [("'Sabbath", 'Sabbath')]
SOL19020301-V17-04-page18.txt: [("'it", 'it')]
SOL19020301-V17-04-page2.txt: [("'the", 'the')]
SOL19020301-V17-04-page23.txt: [("'asked", 'asked')]
SOL19020301-V17-04-page31.txt: [("'and", 'and')]
SOL19020301-V17-04-page34.txt: [("'hastens", 'hastens')]
SOL19020301-V17-04-page4.txt: [("'local", 'local')]
SOL19020301-V17-04-page40.txt: [("'that", 'that')]
SOL19020301-V17-04-page46.txt: [("'Yes", 'Yes')]
SOL19020301-V17-04-page53.txt: [("'talk", 'talk')]
SOL19020301-V17-04-page61.txt: [("'iA", 'iA'), ("''Z", 'Z')]
SOL19020301-V17-04-page62.txt: [("'V", 'V')]
SOL19020301-V17-04-page63.txt: [("'W", 'W'), ("''..", '..'), ("'P.", 'P.')]
SOL19020301-V17-04-page9.txt: [("'work", 'work')]
SOL19020401-V17-05-page10.txt: [("'is", 'is')]
SOL19020401-V17-05-page13.txt: [("'the", 'the')]
SOL19020401-V17-05-page16.txt: [("'was", 'was')]
SOL19020401-V17-05-page19.txt: [("'weekly", 'weekly')]
SOL19020401-V17-05-page34.txt: [("''because", 'because'), ("'Thou", 'Thou')]
SOL19020401-V17-05-page35.txt: [("'as", 'as')]
SOL19020401-V17-05-page36.txt: [("'nton", 'nton')]
SOL19020401-V17-05-page41.txt: [("'war", 'war')]
SOL19020401-V17-05-page43.txt: [("'Obey", 'Obey')]
SOL19020401-V17-05-page45.txt: [("'You", 'You')]
SOL19020401-V17-05-page46.txt: [("'Mid", 'Mid')]
SOL19020401-V17-05-page48.txt: [("'agreed", 'agreed')]
SOL19020401-V17-05-page55.txt: [("'Christian", 'Christian')]
SOL19020401-V17-05-page57.txt: [("'tall", 'tall')]
SOL19020401-V17-05-page58.txt: [("'from", 'from')]
SOL19020401-V17-05-page59.txt: [('\'\'benevolence"', 'benevolence"')]
SOL19020401-V17-05-page9.txt: [("'Who", 'Who')]
SOL19020501-V17-06-page1.txt: [("'OERTY", 'OERTY'), ("'nature", 'nature')]
SOL19020501-V17-06-page13.txt: [("'the", 'the')]
SOL19020501-V17-06-page14.txt: [("'manufacturers.", 'manufacturers.')]
SOL19020501-V17-06-page20.txt: [("'t", 't'), ("'N", 'N'), ("'t", 't'), ("'C", 'C'), ("'Oe", 'Oe'), ("'.", '.'), ("'Svp", 'Svp'), ('\'tee"', 'tee"')]
SOL19020501-V17-06-page24.txt: [("'a", 'a')]
SOL19020501-V17-06-page28.txt: [("'to", 'to')]
SOL19020501-V17-06-page31.txt: [("'bread", 'bread')]
SOL19020501-V17-06-page37.txt: [("'matter", 'matter'), ("'civil", 'civil')]
SOL19020501-V17-06-page38.txt: [("'saloons", 'saloons')]
SOL19020501-V17-06-page44.txt: [("'demanded", 'demanded')]
SOL19020501-V17-06-page54.txt: [("'government", 'government'), ("'to", 'to')]
SOL19020501-V17-06-page55.txt: [("'COMMENT", 'COMMENT')]
SOL19020501-V17-06-page58.txt: [("'be", 'be'), ("'States", 'States'), ("'Hawaii", 'Hawaii')]
SOL19020501-V17-06-page59.txt: [("'them", 'them')]
SOL19020501-V17-06-page60.txt: [("'banished", 'banished')]
SOL19020501-V17-06-page62.txt: [('\'"', '"')]
SOL19020601-V17-07-page13.txt: [("'little", 'little')]
SOL19020601-V17-07-page14.txt: [("'torture", 'torture')]
SOL19020601-V17-07-page16.txt: [("'alarm", 'alarm'), ("'these", 'these')]
SOL19020601-V17-07-page22.txt: [("'of", 'of')]
SOL19020601-V17-07-page23.txt: [("'the", 'the')]
SOL19020601-V17-07-page24.txt: [("'SENTINEL", 'SENTINEL')]
SOL19020601-V17-07-page25.txt: [("'that", 'that')]
SOL19020601-V17-07-page27.txt: [("'war", 'war')]
SOL19020601-V17-07-page37.txt: [("'ordinance", 'ordinance')]
SOL19020601-V17-07-page41.txt: [("'known", 'known')]
SOL19020601-V17-07-page42.txt: [("'Remember", 'Remember')]
SOL19020601-V17-07-page46.txt: [("'disturbances", 'disturbances'), ("'of", 'of')]
SOL19020601-V17-07-page47.txt: [("'it", 'it'), ("'is", 'is')]
SOL19020601-V17-07-page48.txt: [("'for", 'for')]
SOL19020601-V17-07-page49.txt: [("'Early", 'Early')]
SOL19020601-V17-07-page51.txt: [("'Mistaken.", 'Mistaken.')]
SOL19020601-V17-07-page52.txt: [("'comparing", 'comparing'), ("'the", 'the')]
SOL19020601-V17-07-page55.txt: [("'Certainly", 'Certainly')]
SOL19020601-V17-07-page56.txt: [("'their", 'their'), ("'American", 'American')]
SOL19020601-V17-07-page57.txt: [("'clerks", 'clerks')]
SOL19020601-V17-07-page68.txt: [("'No", 'No')]
SOL19020601-V17-07-page7.txt: [("'our", 'our')]
SOL19020601-V17-07-page9.txt: [("'Scarlet", 'Scarlet')]
SOL19020701-V17-08-page1.txt: [("'By", 'By')]
SOL19020701-V17-08-page11.txt: [("'which", 'which')]
SOL19020701-V17-08-page13.txt: [("'WAS", 'WAS'), ("'consent", 'consent')]
SOL19020701-V17-08-page16.txt: [("'impossible", 'impossible')]
SOL19020701-V17-08-page24.txt: [("'its", 'its')]
SOL19020701-V17-08-page25.txt: [("'unconsciously", 'unconsciously')]
SOL19020701-V17-08-page28.txt: [("'unwilling", 'unwilling')]
SOL19020701-V17-08-page30.txt: [("'of", 'of')]
SOL19020701-V17-08-page32.txt: [("'THE", 'THE')]
SOL19020701-V17-08-page34.txt: [("'SENTINEL", 'SENTINEL')]
SOL19020701-V17-08-page35.txt: [("'raft's", 'rafts')]
SOL19020701-V17-08-page39.txt: [("'our", 'our')]
SOL19020701-V17-08-page42.txt: [("'at", 'at')]
SOL19020701-V17-08-page47.txt: [("'to", 'to')]
SOL19020701-V17-08-page48.txt: [("'of", 'of')]
SOL19020701-V17-08-page53.txt: [("'Angelo", 'Angelo')]
SOL19020701-V17-08-page54.txt: [("'stop", 'stop')]
SOL19020701-V17-08-page55.txt: [("'upper", 'upper')]
SOL19020701-V17-08-page56.txt: [("'arrest", 'arrest'), ("'is", 'is')]
SOL19020701-V17-08-page58.txt: [("'man", 'man')]
SOL19020701-V17-08-page59.txt: [("'case", 'case')]
SOL19020701-V17-08-page6.txt: [("'always", 'always')]
SOL19020701-V17-08-page61.txt: [("'Ac", 'Ac')]
SOL19020801-V17-09-page10.txt: [("'goo", 'goo')]
SOL19020801-V17-09-page11.txt: [("'inalienable", 'inalienable')]
SOL19020801-V17-09-page12.txt: [("'the", 'the'), ("'at", 'at')]
SOL19020801-V17-09-page2.txt: [("'contest", 'contest')]
SOL19020801-V17-09-page20.txt: [("'They", 'They')]
SOL19020801-V17-09-page22.txt: [("'with", 'with')]
SOL19020801-V17-09-page24.txt: [("'instance", 'instance')]
SOL19020801-V17-09-page31.txt: [("'these", 'these')]
SOL19020801-V17-09-page33.txt: [("'rather", 'rather'), ("'the", 'the')]
SOL19020801-V17-09-page34.txt: [("'and", 'and'), ("'with", 'with')]
SOL19020801-V17-09-page35.txt: [("'that", 'that')]
SOL19020801-V17-09-page36.txt: [("'tyranny", 'tyranny')]
SOL19020801-V17-09-page38.txt: [("'extends", 'extends')]
SOL19020801-V17-09-page39.txt: [("'It", 'It')]
SOL19020801-V17-09-page41.txt: [("'and", 'and')]
SOL19020801-V17-09-page48.txt: [("'the", 'the')]
SOL19020801-V17-09-page64.txt: [("'The", 'The')]
SOL19020801-V17-09-page8.txt: [("'significance", 'significance')]
SOL19020901-V17-10-page1.txt: [("'and", 'and')]
SOL19020901-V17-10-page12.txt: [("'Another", 'Another')]
SOL19020901-V17-10-page13.txt: [("'was", 'was')]
SOL19020901-V17-10-page16.txt: [("'Church", 'Church')]
SOL19020901-V17-10-page4.txt: [("'since", 'since')]
SOL19020901-V17-10-page40.txt: [("'would", 'would')]
SOL19020901-V17-10-page42.txt: [("'integrity", 'integrity')]
SOL19020901-V17-10-page5.txt: [("'He", 'He')]
SOL19020901-V17-10-page50.txt: [("'there", 'there'), ("'of", 'of')]
SOL19020901-V17-10-page51.txt: [("'movements", 'movements')]
SOL19020901-V17-10-page53.txt: [("'miles", 'miles'), ("'for", 'for'), ("'desecration", 'desecration')]
SOL19020901-V17-10-page54.txt: [("'were", 'were')]
SOL19020901-V17-10-page55.txt: [("'had", 'had'), ("'Vice", 'Vice')]
SOL19020901-V17-10-page56.txt: [("'fe", 'fe')]
SOL19020901-V17-10-page59.txt: [("'..", '..'), ("'A", 'A'), ("'Food", 'Food'), ("'The", 'The'), ("'Historical", 'Historical'), ("'Personal", 'Personal')]
SOL19020901-V17-10-page60.txt: [("'O", 'O'), ('\'KO"', 'KO"')]
SOL19020901-V17-10-page64.txt: [("'age", 'age')]
SOL19021001-V17-11-page1.txt: [("'ERTaf", 'ERTaf')]
SOL19021001-V17-11-page19.txt: [("'prove", 'prove'), ("'on", 'on')]
SOL19021001-V17-11-page21.txt: [("'the", 'the')]
SOL19021001-V17-11-page27.txt: [("'the", 'the')]
SOL19021001-V17-11-page40.txt: [("'priority", 'priority')]
SOL19021001-V17-11-page46.txt: [("'significance", 'significance')]
SOL19021001-V17-11-page51.txt: [("'whom", 'whom')]
SOL19021001-V17-11-page53.txt: [("'sacred", 'sacred'), ("'sacred", 'sacred')]
SOL19021001-V17-11-page55.txt: [("'county", 'county')]
SOL19021001-V17-11-page57.txt: [("'orcement", 'orcement')]
SOL19021001-V17-11-page59.txt: [("'.", '.'), ("'...", '...')]
SOL19021001-V17-11-page6.txt: [("'I", 'I'), ("'Lay", 'Lay')]
SOL19021001-V17-11-page61.txt: [("'with", 'with')]
SOL19021101-V17-12-page17.txt: [("'They", 'They')]
SOL19021101-V17-12-page22.txt: [("'particular", 'particular')]
SOL19021101-V17-12-page34.txt: [("'a", 'a')]
SOL19021101-V17-12-page4.txt: [("'a", 'a'), ("'to", 'to')]
SOL19021101-V17-12-page40.txt: [("'worthy", 'worthy')]
SOL19021101-V17-12-page42.txt: [("'Remember", 'Remember')]
SOL19021101-V17-12-page44.txt: [("'United", 'United')]
SOL19021101-V17-12-page53.txt: [("'totally", 'totally')]
SOL19021101-V17-12-page61.txt: [("'We", 'We')]
SOL19021101-V17-12-page62.txt: [("'..", '..')]
SOL19021201-V17-13-page15.txt: [("'The", 'The')]
SOL19021201-V17-13-page16.txt: [("'Los", 'Los')]
SOL19021201-V17-13-page19.txt: [("'No", 'No')]
SOL19021201-V17-13-page2.txt: [("'to", 'to')]
SOL19021201-V17-13-page20.txt: [("'but", 'but')]
SOL19021201-V17-13-page24.txt: [("'blue", 'blue')]
SOL19021201-V17-13-page25.txt: [("'They", 'They')]
SOL19021201-V17-13-page3.txt: [("'We", 'We'), ("'If", 'If')]
SOL19021201-V17-13-page30.txt: [("'non", 'non')]
SOL19021201-V17-13-page5.txt: [("'of", 'of')]
SOL19021201-V17-13-page50.txt: [("'American", 'American')]
SOL19021201-V17-13-page57.txt: [("'.", '.'), ("'v", 'v'), ("'...........", '...........')]
SOL19021201-V17-13-page7.txt: [("'So", 'So'), ("'the", 'the')]
SOL19030101-V18-01-page18.txt: [("'Ile", 'Ile'), ("'ft", 'ft'), ("'.", '.'), ("'NW.", 'NW.'), ('\'".', '".'), ('\'isno"', 'isno"')]
SOL19030101-V18-01-page7.txt: [("'being", 'being')]
SOL19030108-V18-02-page1.txt: [("'teas", 'teas')]
SOL19030108-V18-02-page10.txt: [("'By", 'By')]
SOL19030108-V18-02-page18.txt: [("'.", '.')]
SOL19030108-V18-02-page20.txt: [("''e.", 'e.')]
SOL19030115-V18-03-page1.txt: [("'G", 'G')]
SOL19030115-V18-03-page10.txt: [("'twelve", 'twelve'), ("'Moral", 'Moral'), ("'Municipal", 'Municipal'), ("'The", 'The'), ("'The", 'The'), ("'Labor", 'Labor'), ("'The", 'The'), ("'National", 'National'), ("'Amusements", 'Amusements'), ("'Prison", 'Prison'), ("'Prevention", 'Prevention'), ("'World's", 'Worlds'), ("'The", 'The'), ("'The", 'The'), ("'Topic", 'Topic'), ("'or", 'or'), ("'My", 'My'), ("'March", 'March'), ("'Sabbath", 'Sabbath'), ("'Civil", 'Civil')]
SOL19030115-V18-03-page11.txt: [("'.", '.')]
SOL19030115-V18-03-page13.txt: [("'proprietors", 'proprietors'), ('\'union."', 'union."')]
SOL19030115-V18-03-page14.txt: [('\'"', '"')]
SOL19030115-V18-03-page17.txt: [("'Sayable", 'Sayable')]
SOL19030115-V18-03-page18.txt: [("'Ms", 'Ms'), ("'which", 'which')]
SOL19030115-V18-03-page4.txt: [("'Sabbath", 'Sabbath'), ("'Civil", 'Civil')]
SOL19030115-V18-03-page5.txt: [("'conscientious", 'conscientious')]
SOL19030115-V18-03-page6.txt: [("'great", 'great'), ("'great", 'great'), ("'held", 'held'), ("'of", 'of')]
SOL19030115-V18-03-page9.txt: [("'ered", 'ered')]
SOL19030122-V18-04-page12.txt: [("'dared", 'dared')]
SOL19030122-V18-04-page17.txt: [("'wish", 'wish')]
SOL19030129-V18-05-page1.txt: [("'A", 'A')]
SOL19030129-V18-05-page12.txt: [("'with", 'with'), ("'Superintendent", 'Superintendent')]
SOL19030129-V18-05-page17.txt: [("'Post-office.", 'Post-office.')]
SOL19030129-V18-05-page18.txt: [("'....", '....'), ("'AI", 'AI'), ("'AI", 'AI')]
SOL19030129-V18-05-page2.txt: [("''Sabbath", 'Sabbath')]
SOL19030129-V18-05-page6.txt: [("'Philippines", 'Philippines')]
SOL19030205-V18-06-page2.txt: [("'hC", 'hC')]
SOL19030205-V18-06-page4.txt: [("'refuses", 'refuses')]
SOL19030205-V18-06-page8.txt: [("'not", 'not')]
SOL19030212-V18-07-page12.txt: [("'of", 'of')]
SOL19030212-V18-07-page5.txt: [("'course", 'course'), ("'that", 'that')]
SOL19030212-V18-07-page9.txt: [("'that", 'that'), ("'and", 'and')]
SOL19030219-V18-08-page10.txt: [('\'Sabbath"', 'Sabbath"')]
SOL19030219-V18-08-page12.txt: [("'when", 'when')]
SOL19030219-V18-08-page13.txt: [("'statement", 'statement')]
SOL19030219-V18-08-page15.txt: [("'Post-office.", 'Post-office.')]
SOL19030219-V18-08-page16.txt: [("''P", 'P')]
SOL19030219-V18-08-page2.txt: [("'lasing", 'lasing')]
SOL19030219-V18-08-page5.txt: [("'of", 'of')]
SOL19030219-V18-08-page7.txt: [('\'"', '"'), ("'persons", 'persons')]
SOL19030219-V18-08-page9.txt: [("'What", 'What'), ("'is", 'is'), ("'from", 'from'), ("'a", 'a')]
SOL19030226-V18-09-page2.txt: [("'Set", 'Set')]
SOL19030226-V18-09-page7.txt: [("'compulsory", 'compulsory')]
SOL19030305-V18-10-page10.txt: [("'better", 'better')]
SOL19030305-V18-10-page15.txt: [("'a", 'a')]
SOL19030305-V18-10-page2.txt: [("'boill", 'boill')]
SOL19030305-V18-10-page7.txt: [("'to", 'to')]
SOL19030312-V18-11-page1.txt: [("'Verily", 'Verily')]
SOL19030312-V18-11-page16.txt: [("'li", 'li'), ("'.e't.te.", '.et.te.'), ("'e", 'e')]
SOL19030312-V18-11-page2.txt: [("'Long", 'Long'), ("'Long", 'Long')]
SOL19030319-V18-12-page11.txt: [("'of", 'of'), ("'damental", 'damental'), ("'Church", 'Church'), ("'delegated", 'delegated'), ("'been", 'been')]
SOL19030319-V18-12-page15.txt: [("'West", 'West'), ("'York", 'York')]
SOL19030326-V18-13-page1.txt: [("'why", 'why')]
SOL19030326-V18-13-page10.txt: [("'champions", 'champions'), ("'carried", 'carried')]
SOL19030402-V18-14-page12.txt: [("'It", 'It')]
SOL19030402-V18-14-page13.txt: [("'THE", 'THE')]
SOL19030402-V18-14-page14.txt: [("'upholds", 'upholds')]
SOL19030402-V18-14-page4.txt: [("'of", 'of'), ("'notes", 'notes')]
SOL19030409-V18-15-page15.txt: [("'P.", 'P.'), ("'DON'T", 'DONT')]
SOL19030409-V18-15-page16.txt: [("'countries", 'countries')]
SOL19030409-V18-15-page6.txt: [("'week.", 'week.')]
SOL19030416-V18-16-page15.txt: [("'.''i'ikAtIzAr.", '.iikAtIzAr.'), ("'.", '.')]
SOL19030423-V18-17-page15.txt: [("'P.", 'P.')]
SOL19030423-V18-17-page5.txt: [("'American", 'American'), ("'American", 'American'), ("'was", 'was')]
SOL19030430-V18-18-page15.txt: [("'P.", 'P.')]
SOL19030430-V18-18-page16.txt: [("'Vatican.", 'Vatican.')]
SOL19030430-V18-18-page3.txt: [("'character", 'character')]
SOL19030430-V18-18-page7.txt: [("'ave", 'ave')]
SOL19030507-V18-19-page16.txt: [("'land", 'land')]
SOL19030507-V18-19-page2.txt: [("'..", '..')]
SOL19030507-V18-19-page7.txt: [("'Catholic.", 'Catholic.'), ("'Tithes", 'Tithes')]
SOL19030514-V18-20-page1.txt: [("'which", 'which'), ("'when", 'when')]
SOL19030514-V18-20-page10.txt: [("'the", 'the')]
SOL19030514-V18-20-page16.txt: [("'..", '..')]
SOL19030514-V18-20-page2.txt: [("'our", 'our')]
SOL19030521-V18-21-page13.txt: [("'being", 'being'), ("'and", 'and')]
SOL19030521-V18-21-page15.txt: [("'r", 'r'), ("'As", 'As'), ("'V", 'V'), ("'.", '.'), ("'.'t", '.t')]
SOL19030521-V18-21-page4.txt: [("'prevent", 'prevent')]
SOL19030521-V18-21-page9.txt: [("'to", 'to')]
SOL19030528-V18-22-page12.txt: [("'observance", 'observance')]
SOL19030528-V18-22-page15.txt: [("'be", 'be')]
SOL19030528-V18-22-page2.txt: [('\'"The', '"The'), ("'religion", 'religion'), ("'the", 'the')]
SOL19030528-V18-22-page9.txt: [("'terrible", 'terrible')]
SOL19030604-V18-23-page12.txt: [("'Gladstone", 'Gladstone')]
SOL19030604-V18-23-page15.txt: [("'open", 'open')]
SOL19030604-V18-23-page2.txt: [("'rx", 'rx'), ("'AA", 'AA'), ("''..Q.", '..Q.'), ("'..", '..')]
SOL19030611-V18-24-page1.txt: [("'well", 'well'), ("'with", 'with')]
SOL19030611-V18-24-page12.txt: [("'employees", 'employees')]
SOL19030611-V18-24-page4.txt: [("'upholding", 'upholding')]
SOL19030611-V18-24-page5.txt: [("'a", 'a')]
SOL19030611-V18-24-page6.txt: [("'these", 'these')]
SOL19030618-V18-25-page12.txt: [("'to-day.", 'to-day.'), ("'successful", 'successful')]
SOL19030618-V18-25-page2.txt: [("'for", 'for')]
SOL19030618-V18-25-page4.txt: [("'One", 'One')]
SOL19030618-V18-25-page8.txt: [("'not", 'not')]
SOL19030618-V18-25-page9.txt: [("'It", 'It')]
SOL19030625-V18-26-page10.txt: [("'in", 'in')]
SOL19030625-V18-26-page11.txt: [("'and", 'and')]
SOL19030625-V18-26-page15.txt: [("'signed", 'signed')]
SOL19030702-V18-27-page1.txt: [("'km", 'km')]
SOL19030702-V18-27-page15.txt: [("'the", 'the')]
SOL19030702-V18-27-page16.txt: [("'t", 't')]
SOL19030702-V18-27-page6.txt: [("'is", 'is')]
SOL19030702-V18-27-page8.txt: [("'the", 'the')]
SOL19030709-V18-28-page11.txt: [("'he", 'he')]
SOL19030709-V18-28-page6.txt: [("'SENTINEL", 'SENTINEL')]
SOL19030716-V18-29-page1.txt: [("'world", 'world')]
SOL19030716-V18-29-page16.txt: [('\'Wohle"mtitet', 'Wohle"mtitet')]
SOL19030716-V18-29-page2.txt: [("'aughitai", 'aughitai')]
SOL19030723-V18-30-page1.txt: [("'vendor", 'vendor')]
SOL19030723-V18-30-page4.txt: [("'election", 'election')]
SOL19030723-V18-30-page5.txt: [("'the", 'the')]
SOL19030723-V18-30-page9.txt: [("'the", 'the')]
SOL19030730-V18-31-page5.txt: [("'The", 'The')]
SOL19030806-V18-32-page12.txt: [("'principles", 'principles')]
SOL19030806-V18-32-page16.txt: [("'V", 'V'), ("'.", '.')]
SOL19030806-V18-32-page2.txt: [("'.", '.'), ('\'k"', 'k"')]
SOL19030806-V18-32-page3.txt: [("'lines", 'lines')]
SOL19030820-V18-34-page1.txt: [("'within", 'within'), ("'when", 'when'), ("'y", 'y')]
SOL19030820-V18-34-page16.txt: [("'rat", 'rat'), ('\'."', '."'), ("'Of", 'Of'), ("'IC", 'IC')]
SOL19030820-V18-34-page6.txt: [("'quoted", 'quoted')]
SOL19030827-V18-35-page3.txt: [("'Workingmen", 'Workingmen')]
SOL19030827-V18-35-page4.txt: [("'with", 'with')]
SOL19030827-V18-35-page5.txt: [("'Six", 'Six')]
SOL19030827-V18-35-page9.txt: [("'This", 'This')]
SOL19030903-V18-36-page16.txt: [("'York", 'York')]
SOL19030903-V18-36-page9.txt: [("'that", 'that')]
SOL19030910-V18-37-page13.txt: [("'their", 'their')]
SOL19030910-V18-37-page4.txt: [("'of", 'of')]
SOL19030910-V18-37-page9.txt: [("'we", 'we')]
SOL19030917-V18-38-page5.txt: [("'On", 'On')]
SOL19030924-V18-39-page1.txt: [("'work", 'work')]
SOL19030924-V18-39-page11.txt: [("'East", 'East')]
SOL19030924-V18-39-page2.txt: [("'WORDS", 'WORDS')]
SOL19031001-V18-40-page1.txt: [("'y", 'y')]
SOL19031015-V18-41-page1.txt: [("'we", 'we')]
SOL19031015-V18-41-page10.txt: [("'An", 'An')]
SOL19031015-V18-41-page11.txt: [("'the", 'the')]
SOL19031015-V18-41-page12.txt: [("'partially", 'partially')]
SOL19031015-V18-41-page16.txt: [("'A", 'A'), ("'.", '.'), ("'cents.", 'cents.')]
SOL19031015-V18-41-page5.txt: [("'having", 'having')]
SOL19031015-V18-41-page6.txt: [("'and", 'and')]
SOL19031022-V18-42-page13.txt: [("'religion", 'religion')]
SOL19031022-V18-42-page15.txt: [("'harmony", 'harmony')]
SOL19031022-V18-42-page16.txt: [("'the", 'the'), ("'courts.", 'courts.'), ("'bar", 'bar')]
SOL19031029-V18-43-page10.txt: [("'ILL.", 'ILL.')]
SOL19031029-V18-43-page16.txt: [('\'w"', 'w"')]
SOL19031029-V18-43-page5.txt: [("'question.", 'question.')]
SOL19031029-V18-43-page6.txt: [("'the", 'the')]
SOL19031029-V18-43-page9.txt: [("'Sunday", 'Sunday')]
SOL19031105-V18-44-page1.txt: [("'with", 'with'), ("'Notation", 'Notation')]
SOL19031105-V18-44-page12.txt: [("'not", 'not')]
SOL19031105-V18-44-page14.txt: [("'I", 'I'), ("'We", 'We')]
SOL19031105-V18-44-page15.txt: [("'City", 'City'), ("'s", 's')]
SOL19031112-V18-45-page1.txt: [("'with", 'with')]
SOL19031112-V18-45-page10.txt: [("'Under", 'Under')]
SOL19031112-V18-45-page11.txt: [("'the", 'the')]
SOL19031112-V18-45-page15.txt: [("'the", 'the'), ("'palpably", 'palpably')]
SOL19031112-V18-45-page6.txt: [("'American", 'American')]
SOL19031112-V18-45-page7.txt: [("'of", 'of')]
SOL19031119-V18-46-page1.txt: [("'who", 'who'), ("'working", 'working'), ("'warn", 'warn'), ("'while.", 'while.'), ("'e", 'e'), ("'way", 'way'), ("'will", 'will'), ("'we", 'we'), ("'while.", 'while.')]
SOL19031119-V18-46-page11.txt: [("'ar", 'ar')]
SOL19031119-V18-46-page12.txt: [("'in", 'in'), ("'That", 'That'), ("'be", 'be')]
SOL19031119-V18-46-page14.txt: [("'lusty", 'lusty')]
SOL19031119-V18-46-page3.txt: [("'military", 'military')]
SOL19031126-V18-47-page12.txt: [("'cloven", 'cloven')]
SOL19031126-V18-47-page15.txt: [("'the", 'the')]
SOL19031126-V18-47-page5.txt: [("'be", 'be')]
SOL19031126-V18-47-page6.txt: [("'by", 'by')]
SOL19031126-V18-47-page7.txt: [("'by", 'by')]
SOL19031126-V18-47-page9.txt: [("'to", 'to')]
SOL19031203-V18-48-page1.txt: [("'want", 'want'), ("'When", 'When')]
SOL19031203-V18-48-page12.txt: [("'why", 'why'), ("'pay", 'pay'), ("'are", 'are'), ("'on", 'on')]
SOL19031203-V18-48-page6.txt: [("'with", 'with')]
SOL19031210-V18-49-page1.txt: [("'with", 'with')]
SOL19031210-V18-49-page14.txt: [("'s", 's')]
SOL19031210-V18-49-page15.txt: [("'s", 's')]
SOL19031210-V18-49-page6.txt: [("'believed", 'believed')]
SOL19031217-V18-50-page10.txt: [("'the", 'the')]
SOL19031217-V18-50-page11.txt: [("'the", 'the')]
SOL19031217-V18-50-page2.txt: [("'Non-sectarian", 'Non-sectarian'), ("'principles.", 'principles.')]
SOL19031217-V18-50-page7.txt: [("'six", 'six')]
SOL19031217-V18-50-page8.txt: [("'shop", 'shop')]
SOL19031217-V18-50-page9.txt: [("'under", 'under')]
SOL19031224-V18-51-page1.txt: [("'ery", 'ery')]
SOL19031224-V18-51-page11.txt: [("'tice", 'tice')]
SOL19031224-V18-51-page2.txt: [("'round", 'round')]
SOL19031231-V18-52-page1.txt: [("'ania", 'ania'), ("'larch", 'larch')]
SOL19031231-V18-52-page15.txt: [("'Remember", 'Remember'), ("'Remember", 'Remember'), ("'course", 'course')]
SOL19031231-V18-52-page2.txt: [("'Sunday", 'Sunday'), ("'works", 'works')]
SOL19031231-V18-52-page3.txt: [("'provides", 'provides'), ("'man", 'man')]
SOL19031231-V18-52-page6.txt: [("'s", 's')]
SOL19040107-V19-01-page1.txt: [("'page", 'page')]
SOL19040107-V19-01-page10.txt: [("'to", 'to'), ("'New", 'New')]
SOL19040107-V19-01-page11.txt: [("'close", 'close'), ("'Labor's", 'Labors')]
SOL19040107-V19-01-page14.txt: [("'We", 'We'), ("'The", 'The')]
SOL19040107-V19-01-page16.txt: [("'.", '.')]
SOL19040107-V19-01-page4.txt: [("'Hidalgo", 'Hidalgo')]
SOL19040107-V19-01-page5.txt: [("'separated", 'separated')]
SOL19040107-V19-01-page9.txt: [("'Sentinel", 'Sentinel')]
SOL19040114-V19-02-page10.txt: [("'requires", 'requires')]
SOL19040114-V19-02-page11.txt: [("'ago", 'ago')]
SOL19040114-V19-02-page2.txt: [("'attending", 'attending')]
SOL19040114-V19-02-page4.txt: [("'taw", 'taw')]
SOL19040114-V19-02-page8.txt: [("'s", 's'), ("'scot", 'scot'), ("'his", 'his')]
SOL19040114-V19-02-page9.txt: [("'enforced", 'enforced'), ("'action", 'action')]
SOL19040121-V19-03-page1.txt: [("'Notation", 'Notation'), ("'bania", 'bania')]
SOL19040121-V19-03-page2.txt: [("'NTllAR", 'NTllAR')]
SOL19040121-V19-03-page4.txt: [("'are", 'are')]
SOL19040121-V19-03-page8.txt: [("'still", 'still')]
SOL19040121-V19-03-page9.txt: [("'in", 'in')]
SOL19040128-V19-04-page1.txt: [("'Notation", 'Notation'), ("'Ivas", 'Ivas')]
SOL19040128-V19-04-page10.txt: [("'see", 'see')]
SOL19040128-V19-04-page13.txt: [("'Jewish", 'Jewish')]
SOL19040128-V19-04-page2.txt: [("'round", 'round')]
SOL19040128-V19-04-page4.txt: [("'The", 'The')]
SOL19040128-V19-04-page5.txt: [("'vs.", 'vs.')]
SOL19040128-V19-04-page9.txt: [("'agents.", 'agents.')]
SOL19040204-V19-05-page1.txt: [("'birtually", 'birtually'), ("'lvhich", 'lvhich')]
SOL19040204-V19-05-page12.txt: [("'any", 'any')]
SOL19040204-V19-05-page14.txt: [("'works", 'works'), ("'James", 'James'), ("'show", 'show')]
SOL19040204-V19-05-page16.txt: [("'h", 'h'), ("'With", 'With')]
SOL19040204-V19-05-page4.txt: [("'hearing", 'hearing'), ('\'"', '"')]
SOL19040204-V19-05-page5.txt: [("'November", 'November')]
SOL19040204-V19-05-page6.txt: [("'Sabbath", 'Sabbath'), ("'cusses", 'cusses')]
SOL19040211-V19-06-page13.txt: [("'of", 'of')]
SOL19040211-V19-06-page15.txt: [("'Baptists", 'Baptists'), ("'a", 'a')]
SOL19040211-V19-06-page2.txt: [("'instruction", 'instruction')]
SOL19040211-V19-06-page6.txt: [("'present", 'present')]
In [23]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/SOL/correction4

Average verified rate: 0.984747821460735

Average of error rates: 0.018210371819960866

Total token count: 1248084

In [24]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[24]:
[("'", 890),
 ('t', 760),
 ('n', 508),
 ('e', 487),
 ('d', 417),
 ('w', 413),
 ('th', 397),
 ('m', 334),
 ('g', 261),
 ('x', 250),
 ('co', 245),
 ('r', 207),
 ('f', 199),
 ('sunday-closing', 180),
 ('u', 147),
 ('sunday-enforcement', 125),
 ('sunday-law', 116),
 ("clerks'", 96),
 ('pa', 96),
 ('z', 85),
 ('mo', 79),
 ('k', 70),
 ('io', 56),
 ('oo', 50),
 ("barbers'", 49),
 ('saloon-keepers', 46),
 ("grocers'", 45),
 ('wm', 44),
 ('mutchler', 43),
 ('re', 42),
 ('church-and-state', 41),
 ('tion', 40),
 ('loth', 38),
 ('law-abiding', 35),
 ('ex', 33),
 ('mass-meeting', 31),
 ('q', 30),
 ('street-cars', 29),
 ("o'gorman", 27),
 ('farmakis', 27),
 ('saloonmen', 27),
 ('sundayclosing', 26),
 ('rest-day', 25),
 ('ourduty', 25),
 ('ment', 25),
 ('lc', 25),
 ('al', 24),
 ('tinel', 24),
 ('se', 24),
 ("butchers'", 24)]

Correction 5 -- Remove Long Errors

In [25]:
reports.long_errors(errors_summary, min_length=15)
Out[25]:
(['gold-embroidered',
  'oxecexexecececeozacexeceo',
  "ittttttt-'''''''",
  'self-renunciation',
  'vrammtwolimtrytwzroamptruuoywy',
  'selfcontradictory',
  'sabbath-breaking',
  'christmas-observing',
  'release-from-toil',
  'dog-in-the-manger',
  'boarding-schools',
  'self-constituted',
  'unchristianizing',
  'ifligsoitritiased',
  'german-protestant',
  'neo-jeffersonian',
  'national-reformers',
  'politico-ecclesiastical',
  'long-established',
  'oneseventh-dayobservertriedandfined',
  'the-constitution',
  "representatives'",
  'pro-administration',
  'sunday-observing',
  'titittimitiviiiiityytyytitimiitwittitymy',
  'respectablechristian',
  'ante-confederation',
  'attorneys-general',
  'ytttrywyttimitfivitittm',
  'inter-denominational',
  'lieutenant-general',
  'completelyreversedbytjuicsehimself',
  'classpfcitizenswhodseisretohavetheir',
  'principle-governed',
  'disfranthisement',
  'multi-millionaires',
  'self-preservation',
  'bothasindividualsandasa',
  'ordinary-looking',
  "'respectability'",
  "will-o'-the-wisps",
  'newly-inaugurated',
  'persenevertheless',
  'politicoreligious',
  'well-authenticated',
  'freedom-of-worship',
  'dogin-the-manger',
  'trans-continental',
  'attorney-general',
  'ttttttttttttttttttttttttttk',
  'corrupgraphically',
  "their'discourses",
  'union-of-church-and-state',
  "self-government'",
  "sendforoneandifyoudon'tthinkitiswell",
  'counter-organization',
  'no-entanglingalliance',
  'iittimityttityffittit',
  'sabbath-observance',
  'self-destruction',
  'conscience-crushing',
  'eternity-reaching',
  'commander-in-chief',
  'lieutenant-governor',
  'narrow-mindedness',
  'ofconsiderations',
  'spanish-american',
  'fellowrevolutionists',
  're-christianized',
  'homestead-braddock',
  'dechristianizing',
  'immtittimivittmitimvivvvvvt',
  'rrrrrrrrrrrrrrrrrrrrn',
  'penny-in-the-slot',
  'righteousness-by-works',
  'god-in-the-constitution',
  'preponderatingly',
  'extra-constitutional',
  'adjutant-general',
  'self-forgetfulness',
  'indiscriminating',
  'sitaround-and-do-nothing',
  'ever-tobe-envied',
  'sundayobservance',
  'dimly-remembered',
  'competition-strangling',
  'sentinfi--editor',
  'ioropytimapswwwfir',
  'divinely-appointed',
  'sundayenforcement',
  'counter-proposing',
  'laughter-compelling',
  'ivitimittittiymitytivitimmtwiymywitiv',
  'sunday-closing-by-law',
  'compulsory-idleness',
  'twentieth-century',
  'britannia-on-the-bay',
  'generacovetousness',
  'fellow-countrymen',
  'mentalandmoralvalue',
  'self-contradictory',
  'twenty-fourhours',
  'distillgilishille',
  'myimivitymmyymititimitimmywitimmi',
  'sunday-enforcement',
  'self-gratification',
  'crossexamination',
  'iivymyytwityyttivity',
  'pagancounterfeits',
  'curiosity-seekers',
  'self-igoveminent',
  "iriatlilill'ariailtarika",
  'firagragragigiledigili',
  'socio-industrial',
  'non-religionists',
  'law-and-order-league',
  'foreign-speaking',
  'ultra-protestants',
  'consent-of-the-governed',
  'unostentatiousness',
  'andmenofinfluence',
  'newly-consecrated',
  'hideously-shaped',
  'ttttttttttttttttttttttttic',
  'democratic-republican',
  'non-intoxicating',
  'sunday-enforcepaent',
  'republican-wrecking',
  'non-sectarianism',
  'ibwhiaslicanuoly',
  're-establishment',
  'indianapolis-kansas',
  'lieutenant-colonel',
  'anti-imperialist',
  'fellow-reformers',
  'sunday-closing-en',
  'easter-observing',
  "'ttttttttttttrrttttt",
  'artedsandllissenotljelphiramr',
  'church-and-state',
  'self-respectingly',
  'largely-attended',
  'ecclesiastical-civil',
  'politico-religio',
  'administrational',
  'pleasure-seeking',
  'fellow-christians',
  'fellow-believers',
  'missionary-educational',
  'understtanddiningg',
  'diseaseproducing',
  'considerablepolitical',
  "'self-government",
  'semi-instinctive',
  'divinely-implanted',
  "foreign'countries",
  'all-comprehending',
  'self-condemnation',
  'fellow-murderers',
  'hamburg-american',
  'whole-heartedness',
  'vice-presidential',
  'fellowreligionists',
  'american-americans',
  'postmaster-general',
  'vokatlauubokatfls',
  'sunday-observance-by-law',
  'desecration-of-the',
  'cross-examination',
  'christian-nation',
  'anti-ritualistic',
  'non-interference',
  'suggesprominence',
  'superenlightened',
  'such-legislation',
  'religion-and-state',
  'sunday-closing-enforcement',
  'politicoecclesiastical',
  'seventeenth-century',
  'excursion-carrying',
  'family-supporting',
  'broad-mindedness',
  'brigadier-general',
  'attorney-generil',
  'bybankdarftonnewyork',
  'recently-published',
  'superficialities',
  'quasi-guardianship',
  'conciliationibus',
  'seven-days-in-the-week',
  'fellow-prisoners',
  '-by-sunday-law-enforcement',
  'most-favored-nation',
  'chrisrepresentatives',
  'religio-political',
  'fellow-unionists',
  'tivititititittitirmy',
  'governor-general',
  'bloody-mindedness',
  'uncontrollableness',
  'anti-evangelical',
  'well-intentioned',
  'church-appointed',
  'district-attorney',
  'sabbath-honoring',
  'poverty-stricken',
  "church-and-'state-established",
  'spiritualization',
  'sunday-observance',
  'church-and-stateism',
  'denominationalizing',
  'sparsely-settled',
  'mission-to-the-vatican',
  'sabbath-keepaccomplishing',
  'self-aggrandizement',
  'delicatessen-shops',
  'un-christianlike',
  'commissioner-general',
  'religiopolitical',
  'book-advertising',
  'immimmoinnieenin',
  'heibmiiiiiiiimiva',
  'self-deliverance',
  'anti-imperialists',
  "'constitutional'",
  'sabbathrtrithout',
  'pleasure-seekers',
  'restaurant-keepers',
  'anti-sabbath-breaking',
  'rrrrrrrrrrrrrrrr',
  'quarter-sessions',
  "negotiations'that",
  'politico-religious',
  'sabbath-observing',
  'self-determining',
  'self-satisfactory',
  'exceptrestaurants',
  'tillman-mclaurin',
  'procurator-general',
  'howling-wilderness',
  "'theministersofreligionareexcepted",
  'estoreggraveinnry',
  'thestatehasnorighttoprohibitit',
  "fellow-citizen's",
  'labor-competition',
  'constantly-exerted',
  'sabbath-breakers',
  'self-destructive',
  'fellow-religionists',
  "international'court",
  'dog-in-themanger',
  'berlin-bridgeport'],
 15)
In [26]:
# %load shared_elements/remove-tokens-with-long-strings-of-characters.py
prev = "correction4"
cycle = "correction5"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    replacements = []
    replacements.append(clean.check_for_repeating_characters(tokens, "m|M"))
    replacements.append(clean.check_for_repeating_characters(tokens, "i|I"))
    replacements.append(clean.check_for_repeating_characters(tokens, "v|V"))
    replacements.append(clean.check_for_repeating_characters(tokens, "t|T"))
    replacements.append(clean.check_for_repeating_characters(tokens, "r|R"))
    
    replacements = [item for sublist in replacements for item in sublist]
            
    if len(replacements) > 0:
        print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    else:
        pass
    
    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
SOL19000510-V15-17-page11.txt: [('IITTIMITYTTITYffITTIT', ' ')]
SOL19000614-V15-23-page10.txt: [('MYIMIVITYMMYYMITITIMITIMMYWITIMMI', ' ')]
SOL19000816-V15-32-page10.txt: [('YTTTRYWYTTIMITFIVITITTM', ' ')]
SOL19000823-V15-33-page15.txt: [('TTTTTTTTTTTTTTTTTTTTTTTTTTK', ' ')]
SOL19000913-V15-36-page15.txt: [("TTI'TTTTTT.TTTTTTTTTTTTTTTTTTTTTTTTIC", ' ')]
SOL19000927-V15-38-page15.txt: [('rrrrrrrrrrrrrrrr', ' '), ('rrrrrrrrrrrrrrrrrrrrn', ' ')]
SOL19001005-V15-39-page15.txt: [("'TTTTTTTTTTTTrrTTTTT.TTTTT", ' ')]
In [27]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/SOL/correction5

Average verified rate: 0.9847549225807744

Average of error rates: 0.01820352250489237

Total token count: 1248075

In [28]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[28]:
[("'", 890),
 ('t', 760),
 ('n', 508),
 ('e', 487),
 ('d', 417),
 ('w', 413),
 ('th', 397),
 ('m', 334),
 ('g', 261),
 ('x', 250),
 ('co', 245),
 ('r', 207),
 ('f', 199),
 ('sunday-closing', 180),
 ('u', 147),
 ('sunday-enforcement', 125),
 ('sunday-law', 116),
 ("clerks'", 96),
 ('pa', 96),
 ('z', 85),
 ('mo', 79),
 ('k', 70),
 ('io', 56),
 ('oo', 50),
 ("barbers'", 49),
 ('saloon-keepers', 46),
 ("grocers'", 45),
 ('wm', 44),
 ('mutchler', 43),
 ('re', 42),
 ('church-and-state', 41),
 ('tion', 40),
 ('loth', 38),
 ('law-abiding', 35),
 ('ex', 33),
 ('mass-meeting', 31),
 ('q', 30),
 ('street-cars', 29),
 ("o'gorman", 27),
 ('farmakis', 27),
 ('saloonmen', 27),
 ('sundayclosing', 26),
 ('rest-day', 25),
 ('ourduty', 25),
 ('ment', 25),
 ('lc', 25),
 ('al', 24),
 ('tinel', 24),
 ('se', 24),
 ('non-receipt', 24)]

Correction 6 -- Separate Squashed Words

In [29]:
# %load shared_elements/separate_squashed_words.py
import pandas as pd
from math import log

prev = cycle
cycle = "correction6"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

verified_tokens = []

for filename in corpus:  
    content = utilities.readfile(directories['prev'], filename)
    clean.get_approved_tokens(content, spelling_dictionary, verified_tokens)

tokens_with_freq = dict(collections.Counter(verified_tokens))
words = pd.DataFrame(list(tokens_with_freq.items()), columns=['token','freq'])
words_sorted = words.sort_values('freq', ascending=False)
words_sorted_short = words_sorted[words_sorted.freq > 2]

sorted_list_of_words = list(words_sorted_short['token'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    text = utilities.strip_punct(content)
    tokens = utilities.tokenize_text(text)
    
    wordcost = dict((k, log((i+1)*log(len(sorted_list_of_words)))) for i,k in enumerate(sorted_list_of_words))
    maxword = max(len(x) for x in sorted_list_of_words)
    
    replacements = []
    
    for token in tokens:
        if not token.lower() in spelling_dictionary:
            if len(token) > 17:
                if re.search(r"[\-\-\'\"]", token):
                    pass
                else:
                    split_string = clean.infer_spaces(token, wordcost, maxword)
                    list_split_string = split_string.split()
                    
                    if clean.verify_split_string(list_split_string, spelling_dictionary):
                        replacements.append((token, split_string))
                    else:
                        pass
            else:
                pass
        else:
            pass
        
    if len(replacements) > 0:
        print("{}: {}".format(filename, replacements))
        
        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
SOL19000510-V15-17-page11.txt: [('IIVYMYYTWITYYTTIVITY', 'I IV Y M Y Y T W I T Y Y T T I V I T Y')]
SOL19000726-V15-29-page1.txt: [('thestatehasnorighttoprohibitit', 'the state has no right to prohibit it')]
SOL19000726-V15-29-page16.txt: [('artedsandllissenotljelphiramr', 'art ed sand l l is sen ot l j el p h ir am r')]
SOL19000816-V15-32-page3.txt: [('considerablepolitical', 'considerable political')]
SOL19020401-V17-05-page50.txt: [('politicoecclesiastical', 'politic o ecclesiastical')]
SOL19020901-V17-10-page47.txt: [('completelyreversedbytjuicsehimself', 'completely reversed by t j u i c s e h i m s e l f')]
SOL19021001-V17-11-page60.txt: [('MentalandMoralValue', 'Mental and Moral Value')]
SOL19030409-V18-15-page14.txt: [('fellowreligionists', 'fellow religionists')]
SOL19030514-V18-20-page12.txt: [('respectableChristian', 'respectable Christian')]
SOL19030604-V18-23-page7.txt: [('fellowrevolutionists', 'fellow revolutionists')]
SOL19030820-V18-34-page3.txt: [('Chrisrepresentatives', 'Chris representatives')]
SOL19031001-V18-40-page5.txt: [('bothasindividualsandasa', 'both as individuals and as a')]
In [30]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/SOL/correction6

Average verified rate: 0.9847541874107677

Average of error rates: 0.01820352250489237

Total token count: 1248146

In [31]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[31]:
[("'", 890),
 ('t', 766),
 ('n', 508),
 ('e', 489),
 ('d', 417),
 ('w', 414),
 ('th', 397),
 ('m', 336),
 ('g', 261),
 ('x', 250),
 ('co', 245),
 ('r', 208),
 ('f', 200),
 ('sunday-closing', 180),
 ('u', 148),
 ('sunday-enforcement', 125),
 ('sunday-law', 116),
 ("clerks'", 96),
 ('pa', 96),
 ('z', 85),
 ('mo', 79),
 ('k', 70),
 ('io', 56),
 ('oo', 50),
 ("barbers'", 49),
 ('saloon-keepers', 46),
 ("grocers'", 45),
 ('wm', 44),
 ('mutchler', 43),
 ('re', 42),
 ('church-and-state', 41),
 ('tion', 40),
 ('loth', 38),
 ('law-abiding', 35),
 ('ex', 33),
 ('mass-meeting', 31),
 ('q', 30),
 ('street-cars', 29),
 ("o'gorman", 27),
 ('farmakis', 27),
 ('saloonmen', 27),
 ('sundayclosing', 26),
 ('rest-day', 25),
 ('ourduty', 25),
 ('ment', 25),
 ('lc', 25),
 ('al', 24),
 ('tinel', 24),
 ('se', 24),
 ('non-receipt', 24)]

Review Remaining Errors

In [32]:
reports.docs_with_high_error_rate(summary)
Out[32]:
[('SOL19011201-V17-01-page61.txt', 0.857),
 ('SOL19020301-V17-04-page63.txt', 0.789),
 ('SOL19020201-V17-03-page63.txt', 0.5),
 ('SOL19020501-V17-06-page20.txt', 0.425)]
In [33]:
# %load shared_elements/high_error_rates.py
doc_keys = [x[0] for x in reports.docs_with_high_error_rate(summary) if x[1] > 0.2]

utilities.open_original_docs(doc_keys, directories['cycle'])
Opened files: 

SOL19011201-V17-01-page61.txt

SOL19020301-V17-04-page63.txt

SOL19020201-V17-03-page63.txt

SOL19020501-V17-06-page20.txt

SOL19011201-V17-01-page61.txt, SOL19020301-V17-04-page63.txt, SOL19020201-V17-03-page63.txt are blank. SOL19020501-V17-06-page20.txt has a large chunk that is on a diagonal.

In [ ]: