LibM-OCR-Evaluation-and-Correction

In [1]:
%load_ext autoreload
In [2]:
%autoreload 2
In [3]:
from text2topics import reports
from text2topics import utilities
from text2topics import clean
import re
import os
from os import listdir
from os.path import isfile, join
import collections
In [4]:
%matplotlib inline
In [5]:
wordlist_dir = "/Users/jeriwieringa/Dissertation/drafts/data/word-lists"
wordlists = ["2016-12-07-SDA-last-names.txt", 
             "2016-12-07-SDA-place-names.txt", 
             "2016-12-08-SDA-Vocabulary.txt", 
             "2017-01-03-place-names.txt", 
             "2017-02-14-Base-Word-List-SCOWL&KJV.txt",
             "2017-02-14-Roman-Numerals.txt",
             "2017-03-01-Additional-Approved-Words.txt"
            ]
In [6]:
spelling_dictionary = utilities.create_spelling_dictionary(wordlist_dir, wordlists)
In [7]:
title = "LibM"
In [8]:
base_dir = "/Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/{}/".format(title)

Baseline

In [9]:
cycle = 'baseline'
In [10]:
stats = reports.overview_report(join(base_dir, cycle), spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/baseline

Average verified rate: 0.9276951364862356

Average of error rates: 0.08840278796771826

Total token count: 1502679

In [11]:
errors_summary = reports.get_errors_summary( stats )
reports.top_errors( errors_summary, 500 )
Out[11]:
[('ñ', 5427),
 ('-', 1800),
 ('re-', 1618),
 ('con-', 1590),
 ("'", 1508),
 ('tion', 1443),
 ('m', 1251),
 ('d', 1242),
 ('¥', 1236),
 ('in-', 1093),
 ('w', 937),
 ('e', 925),
 (')', 906),
 ('ment', 864),
 ('t', 803),
 ('n', 748),
 ('de-', 745),
 ('be-', 695),
 ('+', 648),
 ('com-', 643),
 ('r', 626),
 ('f', 594),
 ('pro-', 571),
 ('sun-', 502)]

Check Special Character Use

In [12]:
reports.tokens_with_special_characters(errors_summary)[:200]
Out[12]:
[('ñ', 5427),
 ('¥', 1236),
 (')', 906),
 ('+', 648),
 ('(', 478),
 ('/', 388),
 ('=', 193),
 ('(affiliated', 173),
 ('*', 169),
 ('•', 137),
 ('ã', 134),
 ('(affil-', 132),
 ('(af-', 119),
 ('_', 99),
 ('(see', 94),
 ('¥¥', 76),
 ('%', 73),
 ('[the', 72),
 ('(or', 65),
 (']', 61),
 ('¥¥¥', 56),
 ('(the', 55),
 ('ó', 52),
 ('year)', 49),
 ('[', 49),
 ('\\', 43),
 ('newfoundland)', 43),
 ('carolina)', 43),
 ('alaska)', 42),
 ('wyoming)', 42),
 ('mexico)', 42),
 ('wisconsin)', 42),
 ('mississippi)', 42),
 ('maryland)', 42),
 ('dakota)', 42),
 ('island)', 42),
 ('arizona)', 42),
 ('(secretary', 40),
 ('(western)', 39),
 ('(affili-', 38),
 ('++', 37),
 ('o)', 36),
 ('(southern)', 35),
 ('(payable', 33),
 ('ô', 32),
 ('(continued', 31),
 ('(west)', 31),
 ('>', 30),
 ('¥¥¥¥', 30),
 ('(s', 30),
 ('(h', 29),
 ('`', 29),
 ('office)', 25),
 ('longacre)', 25),
 ('(a', 24),
 ('england)', 21),
 ('¥=', 20),
 ('(and', 19),
 ('[of', 19),
 ('[sunday]', 19),
 ('chesapeake)', 18),
 ('(which', 18),
 ('(eastern)', 18),
 ('southern)', 18),
 ('m¥', 18),
 ('(concluded', 18),
 ('(south)', 17),
 ('(north)', 17),
 ('tennessee)', 17),
 ('¡', 17),
 ('(east)', 17),
 ('(northern)', 16),
 ('„', 16),
 ('catholic)', 15),
 ('(i', 14),
 ('sunday]', 14),
 ('(washington', 14),
 ('(tennessee)', 14),
 ('=¥', 14),
 ('ñthe', 14),
 ('(over)', 13),
 ('(page', 13),
 ('#', 13),
 ('(australia)', 13),
 ('(england)', 13),
 ('¤', 13),
 ('\ufeff', 13),
 ('(roman', 13),
 ('(france)', 12),
 ('(italics', 12),
 ('(nebraska)', 12),
 ('i%', 12),
 ('¥¥¥¥¥¥', 12),
 ('**', 12),
 ('(massachusetts)', 12),
 ('(exchange', 11),
 ('sunday)', 11),
 ('(if', 11),
 ('¥-', 11),
 ('(peru)', 11),
 ('saskatchewan)', 11),
 ('(chile)', 11),
 ('(cross', 11),
 ('—', 10),
 ('i¥', 10),
 ('-¥', 10),
 ('the¥', 10),
 ('(western', 10),
 ('[in', 10),
 ('¥the', 10),
 ('(a)', 9),
 ('(california)', 9),
 ('(e', 9),
 ('(subscriptions', 9),
 ('(may', 9),
 ('(to', 9),
 ('i)', 9),
 ('(i)', 9),
 ("'ñ", 9),
 ('accepted)', 9),
 ('<', 9),
 ('(your', 9),
 ('(greater)', 9),
 ('[a', 9),
 ('desired)', 9),
 ('c)', 8),
 ('(b)', 8),
 ('th¥', 8),
 ('¤¤', 8),
 ('post-office)', 8),
 ('ña', 8),
 ('(frontispiece)', 8),
 ('ãã', 8),
 ('[not', 8),
 ('(baptist)', 8),
 ('excepted)', 8),
 ('(central)', 8),
 ("'¥", 7),
 ('//', 7),
 ('¥and', 7),
 ('law]', 7),
 ('york)', 7),
 ('¥m', 7),
 ('day]', 7),
 ('=m', 7),
 ('(catholic)', 7),
 ('church]', 7),
 ('(signed)', 7),
 ('©', 7),
 ('(as', 7),
 ('ñid', 7),
 ('[mr', 6),
 ('(two', 6),
 ('(pa', 6),
 ('(minnesota)', 6),
 ('••', 6),
 ('/m', 6),
 ('m=', 6),
 ('(mr', 6),
 ('~~', 6),
 ('=¥¥', 6),
 ('(n', 6),
 ('(for', 6),
 ('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++', 6),
 ('[for', 6),
 ('(by', 6),
 ('(canadian', 6),
 ('(works', 6),
 ('(sunday)', 6),
 ('(rev', 6),
 ('(new', 6),
 ('day)', 6),
 ('(in', 6),
 ('(lay', 6),
 ('(of', 6),
 ('¥¥¥¥¥', 5),
 ('numbers)', 5),
 ('ñibid', 5),
 ('(poetry)', 5),
 ('<>', 5),
 ('[civil', 5),
 ('(civil', 5),
 ('labor)', 5),
 ('attach\x8e', 5),
 ('i/', 5),
 ('(first', 5),
 ('}', 5),
 ('(dec', 5),
 ('(saturday', 5),
 ('prescott)', 5),
 ('[sunday', 5),
 ('♦', 5),
 ('(r', 5),
 ('(john', 5),
 ('[christ]', 5),
 ('~', 5),
 ('ñhon', 5),
 ('(d', 5),
 ('laws]', 5),
 ('++++++++++++++++++++++++++++++++++++++++++++++++++++++++', 5)]

Correction 1 -- Normalize Characters

In [13]:
# %load shared_elements/normalize_characters.py
prev = cycle
cycle = "correction1"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    # Substitute for all other dashes
    content = re.sub(r"—-—–‑", r"-", content)

    # Substitute formatted apostrophe
    content = re.sub(r"\’\’\‘\'\‛\´", r"'", content)
    
    # Replace all special characters with a space (as these tend to occur at the end of lines)
    content = re.sub(r"[^a-zA-Z0-9\s,.!?$:;\-&\'\"]", r" ", content)
    
    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [14]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction1

Average verified rate: 0.9373310897970699

Average of error rates: 0.0763136463683052

Total token count: 1492287

In [15]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[15]:
[('-', 1932),
 ('re-', 1620),
 ('con-', 1590),
 ("'", 1578),
 ('tion', 1446),
 ('m', 1334),
 ('d', 1256),
 ('in-', 1094),
 ('e', 985),
 ('w', 952),
 ('ment', 864),
 ('t', 837),
 ('n', 774),
 ('de-', 745),
 ('be-', 696),
 ('r', 663),
 ('com-', 645),
 ('f', 624),
 ('pro-', 572),
 ('sun-', 505),
 ('ex-', 450),
 ('en-', 446),
 ('tions', 404),
 ('g', 384),
 ('ligious', 367),
 ('per-', 361),
 ('dis-', 360),
 ('un-', 357),
 ('relig-', 351),
 ('na-', 328),
 ('gov-', 324),
 ('ob-', 323),
 ('chris-', 298),
 ('govern-', 292),
 ('x', 265),
 ('ernment', 260),
 ('ious', 258),
 ('ac-', 250),
 ('erty', 237),
 ('ance', 236),
 ('lib-', 236),
 ('pre-', 235),
 ('sab-', 234),
 ('ments', 233),
 ('ad-', 230),
 ('reli-', 219),
 ('tional', 211),
 ('ligion', 209),
 ('u', 206),
 ('im-', 206)]

Correction 2 -- Fix Line Endings

In [16]:
# %load shared_elements/correct_line_endings.py
prev = cycle
cycle = "correction2"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    content = re.sub(r"(\w+)(\-\s{1,})([a-z]+)", r"\1\3", content)

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [17]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction2

Average verified rate: 0.9784327331755492

Average of error rates: 0.037992296404988996

Total token count: 1452618

In [18]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[18]:
[('-', 1918),
 ("'", 1578),
 ('m', 1330),
 ('d', 1250),
 ('e', 976),
 ('w', 951),
 ('t', 819),
 ('n', 772),
 ('r', 662),
 ('f', 620),
 ('g', 381),
 ('x', 265),
 ('u', 205),
 ('k', 188),
 ('tv', 150),
 ('th', 118),
 ('sunday-law', 112),
 ('--', 111),
 ('postmaster-general', 106),
 ('pa', 101),
 ('sunday-closing', 73),
 ('z', 72),
 ('un-american', 72),
 ('id', 71),
 ('io', 70),
 ('statute-books', 66),
 ('post-offices', 66),
 ('co', 61),
 ('church-and-state', 60),
 ('mo', 60),
 ('ga', 58),
 ('va', 56),
 ('attorney-general', 56),
 ('ex', 51),
 ('re', 48),
 ('sunday-rest', 48),
 ('tion', 45),
 ('mm', 42),
 ('q', 41),
 ('mt', 41),
 ('wm', 38),
 ('pp', 38),
 ('re-', 38),
 ('ro', 37),
 ('charta', 37),
 ('mi', 36),
 ('li', 36),
 ('---', 36),
 ('present-day', 35),
 ('religio-political', 33)]

Correction 3 -- Remove extra dashes

In [19]:
# %load shared_elements/remove_extra_dashes.py
prev = cycle
cycle = "correction3"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    replacements = []
    for token in tokens:
        if token[0] is "-":
            replacements.append((token, token[1:]))
            
        elif token[-1] is "-":
            replacements.append((token, token[:-1]))
        else:
            pass
        
    if len(replacements) > 0:
        print("{}: {}".format(filename, replacements))
        
        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
LibM19060401-V01-01-page1.txt: [('support.-', 'support.')]
LibM19060401-V01-01-page10.txt: [('sanc-', 'sanc')]
LibM19060401-V01-01-page11.txt: [('Mc-', 'Mc'), ('Mc-', 'Mc')]
LibM19060401-V01-01-page12.txt: [('organiza-', 'organiza')]
LibM19060401-V01-01-page14.txt: [('altogether."-', 'altogether."')]
LibM19060401-V01-01-page15.txt: [('-the', 'the')]
LibM19060401-V01-01-page17.txt: [('-II.', 'II.'), ('de-', 'de')]
LibM19060401-V01-01-page19.txt: [('-haracterized', 'haracterized')]
LibM19060401-V01-01-page21.txt: [('pe-', 'pe')]
LibM19060401-V01-01-page27.txt: [('-', '')]
LibM19060401-V01-01-page31.txt: [('-', '')]
LibM19060401-V01-01-page32.txt: [('-', '')]
LibM19060401-V01-01-page34.txt: [('-never', 'never'), ('-', ''), ('-', ''), ('-', ''), ('prin-', 'prin')]
LibM19060401-V01-01-page35.txt: [('-', ''), ('-', ''), ('-', ''), ('-gh-l-', 'gh-l-'), ('ner-', 'ner'), ('-', ''), ('-', ''), ('--afilhir', '-afilhir'), ('--', '-'), ('-', ''), ('-', ''), ('-iSW', 'iSW'), ('-', ''), ('-cirm', 'cirm'), ('-', ''), ('--ant', '-ant'), ('-', ''), ('-', ''), ("'e-Ihttnii-ti--", "'e-Ihttnii-ti-"), ('-owl-', 'owl-'), ('-', ''), ('it-', 'it'), ('--', '-'), ('-', ''), ('-ao-te', 'ao-te'), ('-viez', 'viez'), ('-', ''), ('derwee.--', 'derwee.-'), ('..pieLese--', '..pieLese-'), ('o-', 'o'), ('.-', '.'), ('-or--.', 'or--.'), ('tr.-', 'tr.'), ('-', ''), ('-ezel', 'ezel')]
LibM19060401-V01-01-page36.txt: [('SURMOUNT-', 'SURMOUNT')]
LibM19060401-V01-01-page4.txt: [('opin-', 'opin'), ('-', '')]
LibM19060401-V01-01-page6.txt: [('-', '')]
LibM19060401-V01-01-page8.txt: [('en-', 'en')]
LibM19060701-V01-02-page1.txt: [('support.-', 'support.')]
LibM19060701-V01-02-page12.txt: [('LIB-', 'LIB')]
LibM19060701-V01-02-page13.txt: [('-', '')]
LibM19060701-V01-02-page15.txt: [('com-', 'com')]
LibM19060701-V01-02-page18.txt: [('exer-', 'exer')]
LibM19060701-V01-02-page27.txt: [('mis-', 'mis')]
LibM19060701-V01-02-page3.txt: [('C--', 'C-'), ('----', '---'), ('--', '-'), ('-', ''), ('-----------.', '----------.')]
LibM19060701-V01-02-page34.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19060701-V01-02-page5.txt: [('coun-', 'coun')]
LibM19060701-V01-02-page7.txt: [('-', '')]
LibM19061001-V01-03-page11.txt: [('per-', 'per')]
LibM19061001-V01-03-page15.txt: [('inn-', 'inn')]
LibM19061001-V01-03-page17.txt: [('legal-', 'legal'), ('-', '')]
LibM19061001-V01-03-page18.txt: [('-', ''), ('coun-', 'coun')]
LibM19061001-V01-03-page19.txt: [('secu-', 'secu')]
LibM19061001-V01-03-page20.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19061001-V01-03-page24.txt: [('profana-', 'profana')]
LibM19061001-V01-03-page26.txt: [('-governor', 'governor')]
LibM19061001-V01-03-page27.txt: [('-of', 'of'), ('modifi-', 'modifi')]
LibM19061001-V01-03-page28.txt: [('s--', 's-')]
LibM19061001-V01-03-page3.txt: [('-earing', 'earing')]
LibM19061001-V01-03-page30.txt: [('op-', 'op')]
LibM19061001-V01-03-page31.txt: [('Paid-', 'Paid'), ('free-', 'free')]
LibM19061001-V01-03-page34.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19061001-V01-03-page4.txt: [('rhoreh-and-', 'rhoreh-and')]
LibM19061001-V01-03-page8.txt: [('-TOUSES', 'TOUSES')]
LibM19061001-V01-03-page9.txt: [('-', '')]
LibM19070101-V02-01-page12.txt: [('them-', 'them'), ('.-', '.'), ('-', ''), ('A-', 'A')]
LibM19070101-V02-01-page14.txt: [('-conscience', 'conscience')]
LibM19070101-V02-01-page17.txt: [('accord-', 'accord')]
LibM19070101-V02-01-page18.txt: [('Con-', 'Con')]
LibM19070101-V02-01-page2.txt: [('--', '-'), ('-', ''), ('-----', '----'), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('---', '--'), ('-', '')]
LibM19070101-V02-01-page21.txt: [('en-', 'en')]
LibM19070101-V02-01-page22.txt: [('-', '')]
LibM19070101-V02-01-page23.txt: [('Mc-', 'Mc')]
LibM19070101-V02-01-page25.txt: [('Postmaster-', 'Postmaster'), ('in-', 'in'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19070101-V02-01-page3.txt: [('gov-', 'gov')]
LibM19070101-V02-01-page30.txt: [('-', ''), ('-', '')]
LibM19070101-V02-01-page31.txt: [('-', '')]
LibM19070101-V02-01-page34.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19070101-V02-01-page35.txt: [('-', '')]
LibM19070101-V02-01-page7.txt: [('-', '')]
LibM19070101-V02-01-page8.txt: [('un-', 'un')]
LibM19070401-V02-02-page12.txt: [('hun-', 'hun')]
LibM19070401-V02-02-page13.txt: [('Sunday-', 'Sunday')]
LibM19070401-V02-02-page14.txt: [('Sun-', 'Sun')]
LibM19070401-V02-02-page16.txt: [('-', ''), ('enfor-', 'enfor')]
LibM19070401-V02-02-page17.txt: [('LAN-', 'LAN')]
LibM19070401-V02-02-page18.txt: [('men.-', 'men.')]
LibM19070401-V02-02-page19.txt: [('-', '')]
LibM19070401-V02-02-page22.txt: [('Medo-', 'Medo')]
LibM19070401-V02-02-page26.txt: [('consulted.-', 'consulted.')]
LibM19070401-V02-02-page27.txt: [('--', '-'), ('---', '--'), ('-', ''), ('-', ''), ('..-', '..'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.j.i.i.', '.j.i.i.'), ('-f', 'f'), ('-', ''), ('-', ''), ('-I', 'I'), ('I-', 'I'), ("-'il", "'il")]
LibM19070401-V02-02-page3.txt: [('Chi-', 'Chi'), ('-', '')]
LibM19070401-V02-02-page32.txt: [('-', '')]
LibM19070401-V02-02-page34.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19070401-V02-02-page4.txt: [('-', ''), ('Lewis-', 'Lewis'), ('-', '')]
LibM19070401-V02-02-page6.txt: [('an-', 'an')]
LibM19070701-V02-03-page14.txt: [('-', ''), ('rec-', 'rec')]
LibM19070701-V02-03-page15.txt: [('-', ''), ('-', ''), ('at-', 'at')]
LibM19070701-V02-03-page17.txt: [('Indepen-', 'Indepen')]
LibM19070701-V02-03-page18.txt: [('agi-', 'agi')]
LibM19070701-V02-03-page19.txt: [('James-', 'James')]
LibM19070701-V02-03-page20.txt: [('Chris-', 'Chris')]
LibM19070701-V02-03-page21.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19070701-V02-03-page22.txt: [('-', ''), ('FOUNDATIONS.-', 'FOUNDATIONS.')]
LibM19070701-V02-03-page23.txt: [('Mc-', 'Mc'), ('Mc-', 'Mc')]
LibM19070701-V02-03-page24.txt: [('-', '')]
LibM19070701-V02-03-page25.txt: [('-', '')]
LibM19070701-V02-03-page27.txt: [('-', '')]
LibM19070701-V02-03-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19070701-V02-03-page3.txt: [('Protes-', 'Protes')]
LibM19070701-V02-03-page34.txt: [('-', ''), ('-', ''), ('-', ''), ('Act-of-', 'Act-of')]
LibM19070701-V02-03-page8.txt: [('LIB-', 'LIB'), ('-', '')]
LibM19070701-V02-03-page9.txt: [('As-', 'As'), ('-', '')]
LibM19071001-V02-04-page10.txt: [('cor-', 'cor')]
LibM19071001-V02-04-page12.txt: [('-that', 'that')]
LibM19071001-V02-04-page14.txt: [('--', '-'), ('Vice-', 'Vice')]
LibM19071001-V02-04-page15.txt: [('con-', 'con')]
LibM19071001-V02-04-page17.txt: [('in-', 'in'), ('-', ''), ('-', '')]
LibM19071001-V02-04-page18.txt: [('-', '')]
LibM19071001-V02-04-page19.txt: [('-', '')]
LibM19071001-V02-04-page20.txt: [('-', '')]
LibM19071001-V02-04-page21.txt: [('doc-', 'doc')]
LibM19071001-V02-04-page22.txt: [('-', '')]
LibM19071001-V02-04-page23.txt: [('-', '')]
LibM19071001-V02-04-page24.txt: [('rneas-', 'rneas')]
LibM19071001-V02-04-page26.txt: [('----', '---'), ('-', '')]
LibM19071001-V02-04-page27.txt: [('-', '')]
LibM19071001-V02-04-page28.txt: [('-', '')]
LibM19071001-V02-04-page29.txt: [('Record-', 'Record')]
LibM19071001-V02-04-page30.txt: [('decep-', 'decep'), ('-', ''), ('-', '')]
LibM19071001-V02-04-page31.txt: [('-', ''), ('-', '')]
LibM19071001-V02-04-page32.txt: [('law-', 'law'), ('impor-', 'impor'), ('Sab-', 'Sab'), ('-orb', 'orb'), ('re-', 're'), ('-', '')]
LibM19071001-V02-04-page38.txt: [('-', ''), ('essen-', 'essen')]
LibM19071001-V02-04-page40.txt: [('-', '')]
LibM19071001-V02-04-page44.txt: [('--', '-')]
LibM19071001-V02-04-page46.txt: [('-', '')]
LibM19071001-V02-04-page48.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Sov-', 'Sov'), ('-e', 'e'), ('-', '')]
LibM19071001-V02-04-page49.txt: [('-page', 'page'), ('-page', 'page')]
LibM19071001-V02-04-page50.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19071001-V02-04-page51.txt: [('-THAT', 'THAT'), ('-', ''), ('lande.-', 'lande.'), ('temert.-', 'temert.'), ('-', ''), ('Colooiso.-', 'Colooiso.'), ('-', ''), ('velour...ref.-', 'velour...ref.'), ('hands.-', 'hands.'), ('people.-', 'people.'), ('-They', 'They'), ('-W', 'W'), ('-TTE', 'TTE')]
LibM19071001-V02-04-page8.txt: [('hier-', 'hier')]
LibM19080101-V03-01-page1.txt: [('-', ''), ('-.', '.')]
LibM19080101-V03-01-page14.txt: [('-', '')]
LibM19080101-V03-01-page15.txt: [('-', '')]
LibM19080101-V03-01-page20.txt: [('un-', 'un'), ('un-', 'un'), ('presi-', 'presi')]
LibM19080101-V03-01-page22.txt: [('set-', 'set')]
LibM19080101-V03-01-page23.txt: [('-in', 'in')]
LibM19080101-V03-01-page24.txt: [('-', '')]
LibM19080101-V03-01-page26.txt: [('PRES-', 'PRES')]
LibM19080101-V03-01-page32.txt: [('in-', 'in')]
LibM19080101-V03-01-page34.txt: [('Ren-', 'Ren')]
LibM19080101-V03-01-page36.txt: [('haz-', 'haz')]
LibM19080101-V03-01-page39.txt: [('de-', 'de')]
LibM19080101-V03-01-page40.txt: [('docu-', 'docu')]
LibM19080101-V03-01-page41.txt: [('self-govern-', 'self-govern')]
LibM19080101-V03-01-page47.txt: [('As--', 'As-')]
LibM19080101-V03-01-page48.txt: [('-', ''), ('Under-', 'Under'), ('-', ''), ('-', ''), ('Sab-', 'Sab'), ('-', ''), ('Sov-', 'Sov'), ('-', ''), ('-', '')]
LibM19080101-V03-01-page49.txt: [('-page', 'page')]
LibM19080101-V03-01-page50.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('LIB-', 'LIB')]
LibM19080101-V03-01-page51.txt: [('-..ter', '..ter'), ('-', '')]
LibM19080101-V03-01-page6.txt: [('gen-', 'gen')]
LibM19080101-V03-01-page8.txt: [('-', '')]
LibM19080401-V03-02-page1.txt: [('.-q"P--', '.-q"P-'), ('mutummimmomminumummummumunimmiumummummlimummmumumunummtimummimintowitmmummrx--.-', 'mutummimmomminumummummumunimmiumummummlimummmumumunummtimummimintowitmmummrx--.'), ('-', ''), ('-.-', '.-'), ('-', ''), ('-TuaDCII', 'TuaDCII')]
LibM19080401-V03-02-page11.txt: [('gov-', 'gov')]
LibM19080401-V03-02-page12.txt: [('-', '')]
LibM19080401-V03-02-page14.txt: [('-', '')]
LibM19080401-V03-02-page16.txt: [('-', ''), ('sab-', 'sab')]
LibM19080401-V03-02-page18.txt: [('-day', 'day'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Georgia-', 'Georgia'), ('-', ''), ('Illinois-', 'Illinois'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19080401-V03-02-page21.txt: [('con-', 'con')]
LibM19080401-V03-02-page24.txt: [('-', ''), ('suc-', 'suc')]
LibM19080401-V03-02-page3.txt: [('pos-', 'pos')]
LibM19080401-V03-02-page30.txt: [('Postmaster-', 'Postmaster'), ('la-', 'la')]
LibM19080401-V03-02-page35.txt: [('-', '')]
LibM19080401-V03-02-page36.txt: [('re-', 're')]
LibM19080401-V03-02-page41.txt: [('com-', 'com')]
LibM19080401-V03-02-page43.txt: [('-as', 'as'), ('-', ''), ('tol-', 'tol')]
LibM19080401-V03-02-page44.txt: [('every-', 'every')]
LibM19080401-V03-02-page48.txt: [('Under-', 'Under'), ('Sab-', 'Sab'), ('Sov-', 'Sov')]
LibM19080401-V03-02-page50.txt: [('-', '')]
LibM19080401-V03-02-page51.txt: [('ntitzu-', 'ntitzu'), ('-', '')]
LibM19080401-V03-02-page9.txt: [('-', ''), ('gov-', 'gov')]
LibM19080701-V03-03-page1.txt: [('--ff', '-ff'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('muminatatimiumumuutumitimmittimmummminnumminumuffiummumummunnomiminummuummummimmumnini-', 'muminatatimiumumuutumitimmittimmummminnumminumuffiummumummunnomiminummuummummimmumnini'), ('-Z', 'Z'), ('-', ''), ('---v', '--v')]
LibM19080701-V03-03-page11.txt: [('-', '')]
LibM19080701-V03-03-page14.txt: [('free-', 'free')]
LibM19080701-V03-03-page20.txt: [('-', ''), ('-', ''), ('-', ''), ('re-', 're')]
LibM19080701-V03-03-page21.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19080701-V03-03-page22.txt: [('-', ''), ('I-', 'I')]
LibM19080701-V03-03-page26.txt: [('-', '')]
LibM19080701-V03-03-page28.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19080701-V03-03-page29.txt: [('-', '')]
LibM19080701-V03-03-page30.txt: [('-', '')]
LibM19080701-V03-03-page31.txt: [('-o', 'o'), ('-', '')]
LibM19080701-V03-03-page32.txt: [('na-', 'na')]
LibM19080701-V03-03-page39.txt: [('opin-', 'opin')]
LibM19080701-V03-03-page40.txt: [('Con-', 'Con'), ('-President', 'President')]
LibM19080701-V03-03-page43.txt: [('-finest', 'finest'), ('uni-', 'uni'), ('-versal', 'versal')]
LibM19080701-V03-03-page45.txt: [('prohib-', 'prohib')]
LibM19080701-V03-03-page47.txt: [('-', '')]
LibM19080701-V03-03-page48.txt: [('Revelation.-', 'Revelation.'), ('-', ''), ('-', ''), ('Under-', 'Under'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Sov-', 'Sov'), ('Christ-', 'Christ')]
LibM19080701-V03-03-page49.txt: [('-', '')]
LibM19080701-V03-03-page5.txt: [('con-', 'con')]
LibM19080701-V03-03-page50.txt: [('-', '')]
LibM19080701-V03-03-page52.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19080701-V03-03-page6.txt: [('pas-', 'pas')]
LibM19080701-V03-03-page7.txt: [('-', ''), ('-the', 'the')]
LibM19080701-V03-03-page9.txt: [('Post-', 'Post')]
LibM19081001-V03-04-page1.txt: [('E-', 'E'), ('-', ''), ('-rI', 'rI'), ('-', ''), ('-', '')]
LibM19081001-V03-04-page13.txt: [('ef-', 'ef')]
LibM19081001-V03-04-page14.txt: [('-', ''), ('state-', 'state')]
LibM19081001-V03-04-page15.txt: [('-', '')]
LibM19081001-V03-04-page18.txt: [('In-', 'In')]
LibM19081001-V03-04-page19.txt: [('-', '')]
LibM19081001-V03-04-page20.txt: [('-', '')]
LibM19081001-V03-04-page22.txt: [('es-', 'es'), ('for-', 'for')]
LibM19081001-V03-04-page23.txt: [('sab-', 'sab'), ('some-', 'some')]
LibM19081001-V03-04-page26.txt: [('-hall', 'hall')]
LibM19081001-V03-04-page33.txt: [('--', '-')]
LibM19081001-V03-04-page34.txt: [('tend-', 'tend'), ('stri-', 'stri')]
LibM19081001-V03-04-page37.txt: [('punment.--', 'punment.-'), ('imprison-', 'imprison'), ('--', '-')]
LibM19081001-V03-04-page39.txt: [('-', '')]
LibM19081001-V03-04-page4.txt: [('com-', 'com')]
LibM19081001-V03-04-page40.txt: [('com-', 'com')]
LibM19081001-V03-04-page41.txt: [('remem-', 'remem'), ('-which', 'which')]
LibM19081001-V03-04-page43.txt: [('pecul-', 'pecul')]
LibM19081001-V03-04-page44.txt: [('un-', 'un'), ('-', '')]
LibM19081001-V03-04-page47.txt: [('repu-', 'repu')]
LibM19081001-V03-04-page49.txt: [('-', ''), ('-', '')]
LibM19081001-V03-04-page50.txt: [('-', ''), ('-', '')]
LibM19081001-V03-04-page52.txt: [('-', '')]
LibM19081001-V03-04-page8.txt: [('-', '')]
LibM19081001-V03-04-page9.txt: [('un-', 'un')]
LibM19090101-V04-01-page1.txt: [('-', ''), ('.....-', '.....'), ('-', ''), ('-....i"', '....i"'), ('k..a...--', 'k..a...-'), ('-', ''), ('-', ''), ('-', '')]
LibM19090101-V04-01-page17.txt: [('-', ''), ('con-', 'con')]
LibM19090101-V04-01-page18.txt: [('-', '')]
LibM19090101-V04-01-page21.txt: [('-', ''), ('-', '')]
LibM19090101-V04-01-page24.txt: [('-.', '.'), ('.-', '.')]
LibM19090101-V04-01-page26.txt: [('con-', 'con')]
LibM19090101-V04-01-page3.txt: [('relig-', 'relig')]
LibM19090101-V04-01-page30.txt: [('-', '')]
LibM19090101-V04-01-page33.txt: [('-', ''), ('na-', 'na'), ('insti-', 'insti'), ('r--', 'r-')]
LibM19090101-V04-01-page36.txt: [('-', '')]
LibM19090101-V04-01-page37.txt: [('-.', '.'), ('.-', '.')]
LibM19090101-V04-01-page40.txt: [('founda-', 'founda')]
LibM19090101-V04-01-page41.txt: [('per-', 'per')]
LibM19090101-V04-01-page44.txt: [('in-', 'in'), ('disor-', 'disor')]
LibM19090101-V04-01-page45.txt: [('be-', 'be'), ('Mc-', 'Mc')]
LibM19090101-V04-01-page47.txt: [('-', ''), ('-', '')]
LibM19090101-V04-01-page49.txt: [('Post-', 'Post'), ('-', ''), ('-', '')]
LibM19090101-V04-01-page5.txt: [('-', '')]
LibM19090101-V04-01-page50.txt: [('-', '')]
LibM19090101-V04-01-page52.txt: [('-', ''), ('-', '')]
LibM19090101-V04-01-page7.txt: [('SECRE-', 'SECRE')]
LibM19090401-V04-02-page1.txt: [('-', ''), ('-L', 'L'), ('-', ''), ('-', ''), ('---mussuaillir', '--mussuaillir'), ('-', '')]
LibM19090401-V04-02-page10.txt: [('scru-', 'scru')]
LibM19090401-V04-02-page12.txt: [('-', ''), ('Gib-', 'Gib'), ('in-', 'in')]
LibM19090401-V04-02-page2.txt: [('-hi', 'hi'), ('-', ''), ('-ss', 'ss'), ('e-', 'e'), ('-', ''), ('-', ''), ('ja-', 'ja'), ('ace-', 'ace'), ('-Q.s.-', 'Q.s.-'), ('Ca-', 'Ca'), ('-', ''), ('-', ''), ('.....-', '.....'), ('-', ''), ('-', ''), ('-', ''), ('-V', 'V'), ('-', ''), ('-', ''), ('-', ''), ('-dte-y', 'dte-y'), ('-', ''), ('-c.x', 'c.x'), ('-eed', 'eed'), ('-', ''), ('rt-', 'rt'), ('-', ''), ('-', ''), ('-', ''), ("'r-", "'r"), ('-n', 'n')]
LibM19090401-V04-02-page20.txt: [('Vir-', 'Vir')]
LibM19090401-V04-02-page23.txt: [('-', ''), ('As-', 'As')]
LibM19090401-V04-02-page30.txt: [('--', '-'), ('gov-', 'gov')]
LibM19090401-V04-02-page36.txt: [('-t', 't')]
LibM19090401-V04-02-page38.txt: [('Rich-', 'Rich'), ('neverthe-', 'neverthe')]
LibM19090401-V04-02-page45.txt: [('-', ''), ('con-', 'con'), ('-t', 't'), ('-', ''), ('-eta', 'eta'), ('.-', '.'), ('-', ''), ('aforexo.-', 'aforexo.')]
LibM19090401-V04-02-page46.txt: [('-', ''), ('mur-', 'mur'), ('-', ''), ('-.', '.')]
LibM19090401-V04-02-page48.txt: [('PEAR-', 'PEAR'), ('-', ''), ('-', '')]
LibM19090401-V04-02-page49.txt: [('Post-', 'Post'), ('-', ''), ('-', '')]
LibM19090401-V04-02-page5.txt: [('-.', '.'), ('.-', '.')]
LibM19090401-V04-02-page50.txt: [('-', '')]
LibM19090401-V04-02-page51.txt: [('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', '')]
LibM19090401-V04-02-page8.txt: [('Mc-', 'Mc')]
LibM19090401-V04-02-page9.txt: [('-', ''), ('acknowl-', 'acknowl')]
LibM19090701-V04-03-page1.txt: [('-"', '"'), ('-lib', 'lib'), ('..ILI--', '..ILI-')]
LibM19090701-V04-03-page10.txt: [('RECEP-', 'RECEP')]
LibM19090701-V04-03-page11.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('an-', 'an'), ('at--', 'at-')]
LibM19090701-V04-03-page13.txt: [('Con-', 'Con'), ('-', '')]
LibM19090701-V04-03-page14.txt: [('-', '')]
LibM19090701-V04-03-page2.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19090701-V04-03-page22.txt: [('Russian-', 'Russian')]
LibM19090701-V04-03-page23.txt: [('-', '')]
LibM19090701-V04-03-page26.txt: [('-', '')]
LibM19090701-V04-03-page29.txt: [('time-hon-', 'time-hon')]
LibM19090701-V04-03-page3.txt: [('mat-', 'mat')]
LibM19090701-V04-03-page30.txt: [('-', '')]
LibM19090701-V04-03-page32.txt: [('de-', 'de')]
LibM19090701-V04-03-page33.txt: [('non-', 'non'), ('pro-', 'pro')]
LibM19090701-V04-03-page34.txt: [('Hu-', 'Hu'), ('CHRIS-', 'CHRIS'), ('be-', 'be')]
LibM19090701-V04-03-page36.txt: [('--', '-')]
LibM19090701-V04-03-page37.txt: [('there-', 'there'), ('re-', 're')]
LibM19090701-V04-03-page40.txt: [('en-', 'en')]
LibM19090701-V04-03-page42.txt: [('-taptimi', 'taptimi'), ('-thifii', 'thifii'), ('-', ''), ('trinn-', 'trinn'), ('-fihAt', 'fihAt'), ('Yr-', 'Yr')]
LibM19090701-V04-03-page44.txt: [('en-', 'en'), ('Anti-', 'Anti')]
LibM19090701-V04-03-page48.txt: [('Post-', 'Post'), ('Cook-', 'Cook')]
LibM19090701-V04-03-page49.txt: [('-', ''), ('-', ''), ('APPEAR-', 'APPEAR')]
LibM19090701-V04-03-page5.txt: [('govern-', 'govern')]
LibM19090701-V04-03-page50.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19090701-V04-03-page51.txt: [('-', '')]
LibM19090701-V04-03-page52.txt: [('-', ''), ('-', '')]
LibM19090701-V04-03-page6.txt: [('pro-', 'pro')]
LibM19090701-V04-03-page7.txt: [('Mc-', 'Mc')]
LibM19090701-V04-03-page9.txt: [('-', ''), ('-', '')]
LibM19091001-V04-04-page10.txt: [('af-', 'af')]
LibM19091001-V04-04-page11.txt: [('gov-', 'gov'), ('horse-', 'horse')]
LibM19091001-V04-04-page13.txt: [('be-', 'be')]
LibM19091001-V04-04-page14.txt: [('af-', 'af')]
LibM19091001-V04-04-page15.txt: [('R-', 'R')]
LibM19091001-V04-04-page16.txt: [('-.', '.'), ('.-', '.'), ('spiritu-', 'spiritu')]
LibM19091001-V04-04-page17.txt: [('es-', 'es')]
LibM19091001-V04-04-page18.txt: [('di-', 'di')]
LibM19091001-V04-04-page19.txt: [('-', '')]
LibM19091001-V04-04-page2.txt: [('Au-', 'Au'), ('Post-', 'Post')]
LibM19091001-V04-04-page21.txt: [('-', '')]
LibM19091001-V04-04-page22.txt: [('-', ''), ('-', ''), ('anti-', 'anti'), ('-', ''), ('-', ''), ('Mc-', 'Mc')]
LibM19091001-V04-04-page23.txt: [('A.-', 'A.')]
LibM19091001-V04-04-page25.txt: [('.T-', '.T')]
LibM19091001-V04-04-page3.txt: [('-', '')]
LibM19091001-V04-04-page30.txt: [('Ware-', 'Ware'), ('-the', 'the')]
LibM19091001-V04-04-page31.txt: [('-', ''), ('CON-', 'CON')]
LibM19091001-V04-04-page32.txt: [('-', ''), ('-', '')]
LibM19091001-V04-04-page35.txt: [('de-', 'de')]
LibM19091001-V04-04-page36.txt: [('-', '')]
LibM19091001-V04-04-page38.txt: [('b-', 'b'), ('phrase-', 'phrase'), ('-', '')]
LibM19091001-V04-04-page39.txt: [('non-', 'non')]
LibM19091001-V04-04-page4.txt: [('para-', 'para')]
LibM19091001-V04-04-page45.txt: [('finan-', 'finan')]
LibM19091001-V04-04-page47.txt: [('APPEAR-', 'APPEAR')]
LibM19091001-V04-04-page48.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19091001-V04-04-page7.txt: [('Mc-', 'Mc'), ('differen-', 'differen')]
LibM19091001-V04-04-page8.txt: [('-', '')]
LibM19091001-V04-04-page9.txt: [('-', '')]
LibM19100101-V05-01-page1.txt: [('r-', 'r'), ('-.', '.'), ('.-', '.')]
LibM19100101-V05-01-page11.txt: [('thou-', 'thou')]
LibM19100101-V05-01-page13.txt: [('-', ''), ('Ad-', 'Ad')]
LibM19100101-V05-01-page14.txt: [('WASH-', 'WASH'), ('RE-', 'RE'), ('mem-', 'mem')]
LibM19100101-V05-01-page17.txt: [('Mc-', 'Mc'), ('Secretary-of-', 'Secretary-of')]
LibM19100101-V05-01-page19.txt: [('incon-', 'incon')]
LibM19100101-V05-01-page2.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19100101-V05-01-page20.txt: [('com-', 'com')]
LibM19100101-V05-01-page21.txt: [('sup-', 'sup')]
LibM19100101-V05-01-page23.txt: [('free-', 'free')]
LibM19100101-V05-01-page24.txt: [('Chris-', 'Chris')]
LibM19100101-V05-01-page27.txt: [('-', '')]
LibM19100101-V05-01-page31.txt: [('-', ''), ('-', ''), ('--', '-'), ('--', '-')]
LibM19100101-V05-01-page32.txt: [('guar-', 'guar'), ('Postmaster-', 'Postmaster')]
LibM19100101-V05-01-page33.txt: [('-Edward', 'Edward'), ('des-', 'des')]
LibM19100101-V05-01-page34.txt: [('Anti-', 'Anti')]
LibM19100101-V05-01-page35.txt: [('com-', 'com')]
LibM19100101-V05-01-page36.txt: [('-', ''), ('-"', '"'), ('-', '')]
LibM19100101-V05-01-page37.txt: [('separa-', 'separa')]
LibM19100101-V05-01-page39.txt: [('-Z', 'Z'), ('-.E', '.E'), ('-', ''), ('-A', 'A')]
LibM19100101-V05-01-page42.txt: [('-', '')]
LibM19100101-V05-01-page45.txt: [('-', '')]
LibM19100101-V05-01-page46.txt: [('-', ''), ('over-', 'over')]
LibM19100101-V05-01-page47.txt: [('-', '')]
LibM19100101-V05-01-page48.txt: [('-', '')]
LibM19100101-V05-01-page49.txt: [('sp-', 'sp'), ('-', ''), ('-', '')]
LibM19100101-V05-01-page50.txt: [('-', ''), ('Artaa.--', 'Artaa.-')]
LibM19100101-V05-01-page6.txt: [('-', '')]
LibM19100101-V05-01-page7.txt: [('ap-', 'ap'), ('dis-', 'dis'), ('Cath-', 'Cath')]
LibM19100401-V05-02-page1.txt: [('...m..."..--', '...m..."..-'), ('.-', '.'), ('--mommumniummunuimiumuutimutimmulummimmiummintomunmumumummumumumnomminuninumninummumumummtuntiummirt', '-mommumniummunuimiumuutimutimmulummimmiummintomunmumumummumumumnomminuninumninummumumummtuntiummirt'), ('-.', '.'), ('-', ''), ('-', ''), ("-'-", "'-"), ('-j', 'j'), ("--S-'''", "-S-'''"), ('--', '-')]
LibM19100401-V05-02-page12.txt: [('enforce-', 'enforce'), ('op-', 'op')]
LibM19100401-V05-02-page13.txt: [('-', ''), ('-', '')]
LibM19100401-V05-02-page16.txt: [('Mary-', 'Mary')]
LibM19100401-V05-02-page18.txt: [('-', '')]
LibM19100401-V05-02-page2.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19100401-V05-02-page21.txt: [('sun-', 'sun')]
LibM19100401-V05-02-page24.txt: [('at-', 'at'), ('-church', 'church')]
LibM19100401-V05-02-page25.txt: [('trans-', 'trans')]
LibM19100401-V05-02-page26.txt: [('in-', 'in')]
LibM19100401-V05-02-page27.txt: [('ex-', 'ex')]
LibM19100401-V05-02-page3.txt: [('-PR', 'PR')]
LibM19100401-V05-02-page35.txt: [('-', ''), ('-friEHORRoki', 'friEHORRoki'), ('-CHER', 'CHER')]
LibM19100401-V05-02-page38.txt: [('Sun-', 'Sun')]
LibM19100401-V05-02-page40.txt: [('advo-', 'advo')]
LibM19100401-V05-02-page46.txt: [('re-', 're')]
LibM19100401-V05-02-page48.txt: [('Teach-', 'Teach')]
LibM19100401-V05-02-page49.txt: [('-', '')]
LibM19100401-V05-02-page5.txt: [('LIB-', 'LIB')]
LibM19100401-V05-02-page50.txt: [('-', ''), ('ac-', 'ac')]
LibM19100401-V05-02-page52.txt: [('-', ''), ('legisla-', 'legisla'), ('Jan-', 'Jan')]
LibM19100401-V05-02-page6.txt: [('-', '')]
LibM19100401-V05-02-page8.txt: [('PRESI-', 'PRESI')]
LibM19100701-V05-03-page17.txt: [('pros-', 'pros')]
LibM19100701-V05-03-page18.txt: [('ex-', 'ex')]
LibM19100701-V05-03-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('Lafayette-', 'Lafayette'), ('-', '')]
LibM19100701-V05-03-page20.txt: [('-', ''), ('-', '')]
LibM19100701-V05-03-page21.txt: [('cus-', 'cus'), ('-', '')]
LibM19100701-V05-03-page22.txt: [('-', '')]
LibM19100701-V05-03-page23.txt: [('Cath-', 'Cath')]
LibM19100701-V05-03-page24.txt: [('-', '')]
LibM19100701-V05-03-page26.txt: [('prin-', 'prin')]
LibM19100701-V05-03-page28.txt: [('non-', 'non'), ('insti-', 'insti'), ('re-', 're')]
LibM19100701-V05-03-page30.txt: [('-', ''), ('-I', 'I')]
LibM19100701-V05-03-page32.txt: [('An-', 'An'), ('Gen-', 'Gen')]
LibM19100701-V05-03-page34.txt: [('-', ''), ('meet-', 'meet')]
LibM19100701-V05-03-page35.txt: [('Li-', 'Li'), ('circula-', 'circula'), ('Pro-', 'Pro')]
LibM19100701-V05-03-page37.txt: [('ERRON-', 'ERRON'), ('HISTOR-', 'HISTOR'), ('PRAC-', 'PRAC'), ('-', '')]
LibM19100701-V05-03-page40.txt: [('--', '-')]
LibM19100701-V05-03-page46.txt: [('Anti-', 'Anti')]
LibM19100701-V05-03-page49.txt: [('PROTES-', 'PROTES'), ('MAG-', 'MAG'), ('Roosevelt-', 'Roosevelt'), ('-', '')]
LibM19100701-V05-03-page5.txt: [('Vat-', 'Vat')]
LibM19100701-V05-03-page50.txt: [('-', '')]
LibM19100701-V05-03-page52.txt: [('Inter-', 'Inter'), ('Post-', 'Post')]
LibM19100701-V05-03-page7.txt: [('Mc-', 'Mc')]
LibM19101001-V05-04-page1.txt: [('-', ''), ('-ANIMMIIMMIMMIIMMIIMIMMIWIMMUMWHIMOMMOMMIIMMIHMUMMIMIUMMIMMEMIIMMUMMUMMENUMIIIIMMUUMMINUMMIS', 'ANIMMIIMMIMMIIMMIIMIMMIWIMMUMWHIMOMMOMMIIMMIHMUMMIMIUMMIMMEMIIMMUMMUMMENUMIIIIMMUUMMINUMMIS'), ('st-', 'st'), ('-..-...', '..-...'), ('-X', 'X'), ('"-', '"'), ('-', ''), ('r.-', 'r.'), ('-', ''), ('-', ''), ('---', '--')]
LibM19101001-V05-04-page10.txt: [('-under', 'under')]
LibM19101001-V05-04-page11.txt: [('-authority', 'authority')]
LibM19101001-V05-04-page13.txt: [('-', '')]
LibM19101001-V05-04-page15.txt: [('-', '')]
LibM19101001-V05-04-page16.txt: [('gov-', 'gov')]
LibM19101001-V05-04-page19.txt: [('-', ''), ('spir-', 'spir')]
LibM19101001-V05-04-page2.txt: [('-', ''), ('-', ''), ('-S', 'S')]
LibM19101001-V05-04-page21.txt: [('-', ''), ('OPEN-', 'OPEN')]
LibM19101001-V05-04-page23.txt: [('OPEN-', 'OPEN'), ('gov-', 'gov')]
LibM19101001-V05-04-page24.txt: [('MON-', 'MON')]
LibM19101001-V05-04-page25.txt: [('hon-', 'hon')]
LibM19101001-V05-04-page26.txt: [('sig-', 'sig')]
LibM19101001-V05-04-page28.txt: [('MON-', 'MON'), ('char-', 'char'), ('in-', 'in'), ('L-', 'L')]
LibM19101001-V05-04-page29.txt: [('-', ''), ('interna-', 'interna')]
LibM19101001-V05-04-page30.txt: [('com-', 'com')]
LibM19101001-V05-04-page32.txt: [('antipedo-', 'antipedo')]
LibM19101001-V05-04-page34.txt: [('fear-', 'fear'), ('-', '')]
LibM19101001-V05-04-page35.txt: [('consola-', 'consola')]
LibM19101001-V05-04-page36.txt: [('-', '')]
LibM19101001-V05-04-page39.txt: [('y-', 'y')]
LibM19101001-V05-04-page42.txt: [('Zapnath-', 'Zapnath'), ('-"', '"'), ('Tel-el-', 'Tel-el')]
LibM19101001-V05-04-page43.txt: [('de-', 'de')]
LibM19101001-V05-04-page49.txt: [('PROTES-', 'PROTES'), ('MAG-', 'MAG'), ('Roosevelt-', 'Roosevelt')]
LibM19101001-V05-04-page5.txt: [('-.', '.'), ('-', '')]
LibM19101001-V05-04-page50.txt: [('-', '')]
LibM19101001-V05-04-page51.txt: [('-', '')]
LibM19101001-V05-04-page8.txt: [('-', '')]
LibM19101001-V05-04-page9.txt: [('-America', 'America')]
LibM19110101-V06-01-page1.txt: [('-...ffiummummiummunnummumummmumumummummunamummunummuumummmunummunnummumummumnitumnims', '...ffiummummiummunnummumummmumumummummunamummunummuumummmunummunnummumummumnitumnims'), ('-"C""', '"C""'), ('-', ''), ('-', ''), ('-', ''), ('Z---', 'Z--'), ('-', ''), ('.---', '.--'), ('-.', '.'), ('-', ''), ('ir-', 'ir'), ('-', ''), ('"nrnurilillpii"-', '"nrnurilillpii"')]
LibM19110101-V06-01-page11.txt: [('-as', 'as'), ('desire-', 'desire')]
LibM19110101-V06-01-page12.txt: [('sum-', 'sum')]
LibM19110101-V06-01-page13.txt: [('-', '')]
LibM19110101-V06-01-page15.txt: [('-', '')]
LibM19110101-V06-01-page18.txt: [('enforce-', 'enforce'), ('Mc-', 'Mc')]
LibM19110101-V06-01-page2.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19110101-V06-01-page20.txt: [('ac-', 'ac')]
LibM19110101-V06-01-page22.txt: [('-i', 'i'), ('-', ''), ('-', '')]
LibM19110101-V06-01-page23.txt: [('-', '')]
LibM19110101-V06-01-page27.txt: [('-', '')]
LibM19110101-V06-01-page29.txt: [('contra-', 'contra')]
LibM19110101-V06-01-page31.txt: [('par-', 'par')]
LibM19110101-V06-01-page34.txt: [('RE-', 'RE'), ('-great', 'great')]
LibM19110101-V06-01-page35.txt: [('lib-', 'lib')]
LibM19110101-V06-01-page36.txt: [('-', '')]
LibM19110101-V06-01-page42.txt: [('-', '')]
LibM19110101-V06-01-page43.txt: [('-', '')]
LibM19110101-V06-01-page49.txt: [('-', '')]
LibM19110101-V06-01-page5.txt: [('differ-', 'differ')]
LibM19110101-V06-01-page50.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19110101-V06-01-page6.txt: [('-', ''), ('--', '-'), ('-', ''), ('-', '')]
LibM19110101-V06-01-page7.txt: [('po-', 'po')]
LibM19110101-V06-01-page8.txt: [('Latin-', 'Latin')]
LibM19110101-V06-01-page9.txt: [('--', '-'), ('-', '')]
LibM19110401-V06-02-page1.txt: [('-', '')]
LibM19110401-V06-02-page11.txt: [('ac-', 'ac')]
LibM19110401-V06-02-page12.txt: [('employ-', 'employ')]
LibM19110401-V06-02-page13.txt: [('oc-', 'oc'), ('legiti-', 'legiti')]
LibM19110401-V06-02-page14.txt: [('meas-', 'meas')]
LibM19110401-V06-02-page16.txt: [('nec-', 'nec')]
LibM19110401-V06-02-page18.txt: [('UNI-', 'UNI'), ('labor-', 'labor')]
LibM19110401-V06-02-page2.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19110401-V06-02-page20.txt: [('en-', 'en')]
LibM19110401-V06-02-page26.txt: [('varia-', 'varia'), ('-', '')]
LibM19110401-V06-02-page27.txt: [('offi-', 'offi')]
LibM19110401-V06-02-page3.txt: [('-', ''), ('-', ''), ('.-', '.')]
LibM19110401-V06-02-page32.txt: [('Bap-', 'Bap'), ('Relig-', 'Relig')]
LibM19110401-V06-02-page33.txt: [('-wow-', 'wow-')]
LibM19110401-V06-02-page34.txt: [('-', ''), ('per-', 'per')]
LibM19110401-V06-02-page40.txt: [('es-', 'es')]
LibM19110401-V06-02-page42.txt: [('-', ''), ('-', ''), ('-.', '.'), ('.-', '.')]
LibM19110401-V06-02-page43.txt: [('-', ''), ('-', ''), ('God.-', 'God.')]
LibM19110401-V06-02-page46.txt: [('Conti-', 'Conti'), ('BUILD-', 'BUILD'), ('per-', 'per')]
LibM19110401-V06-02-page47.txt: [('Globe-', 'Globe')]
LibM19110401-V06-02-page48.txt: [('-', ''), ('--', '-')]
LibM19110401-V06-02-page49.txt: [('-', ''), ('-', ''), ('-Lamer.', 'Lamer.')]
LibM19110401-V06-02-page50.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19110401-V06-02-page52.txt: [('-o', 'o')]
LibM19110701-V06-03-page1.txt: [('--', '-'), ('-', ''), ('-dkialligranli', 'dkialligranli')]
LibM19110701-V06-03-page10.txt: [('-', '')]
LibM19110701-V06-03-page12.txt: [('-', ''), ('-IT.', 'IT.'), ('right-', 'right')]
LibM19110701-V06-03-page14.txt: [('un-', 'un')]
LibM19110701-V06-03-page15.txt: [('-for', 'for')]
LibM19110701-V06-03-page16.txt: [('establish-', 'establish')]
LibM19110701-V06-03-page2.txt: [('-', ''), ('-', '')]
LibM19110701-V06-03-page21.txt: [('Eng-', 'Eng')]
LibM19110701-V06-03-page22.txt: [('peo-', 'peo')]
LibM19110701-V06-03-page24.txt: [('-', ''), ('manufac-', 'manufac')]
LibM19110701-V06-03-page25.txt: [('ter-', 'ter'), ('wor-', 'wor'), ('-', '')]
LibM19110701-V06-03-page26.txt: [('.-', '.'), ('-', ''), ('.-', '.')]
LibM19110701-V06-03-page27.txt: [('re-', 're')]
LibM19110701-V06-03-page28.txt: [('audience-', 'audience')]
LibM19110701-V06-03-page31.txt: [('ac-', 'ac')]
LibM19110701-V06-03-page32.txt: [('Prot-', 'Prot'), ('re-', 're')]
LibM19110701-V06-03-page33.txt: [('Sabbathkeep-', 'Sabbathkeep'), ('under-', 'under')]
LibM19110701-V06-03-page35.txt: [('mem-', 'mem')]
LibM19110701-V06-03-page37.txt: [('dissolu-', 'dissolu')]
LibM19110701-V06-03-page39.txt: [('-', '')]
LibM19110701-V06-03-page4.txt: [('-', '')]
LibM19110701-V06-03-page41.txt: [('bar-', 'bar')]
LibM19110701-V06-03-page42.txt: [('Ma-', 'Ma')]
LibM19110701-V06-03-page45.txt: [('re-', 're')]
LibM19110701-V06-03-page48.txt: [('-N', 'N')]
LibM19110701-V06-03-page49.txt: [('-', ''), ('-', ''), ('treat-', 'treat')]
LibM19110701-V06-03-page5.txt: [('re-', 're')]
LibM19110701-V06-03-page50.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('expe-', 'expe')]
LibM19110701-V06-03-page52.txt: [('rea-', 'rea')]
LibM19110701-V06-03-page9.txt: [('Post-', 'Post'), ('ob-', 'ob')]
LibM19111001-V06-04-page1.txt: [('-', ''), ('-', '')]
LibM19111001-V06-04-page11.txt: [('Latin-', 'Latin'), ('An-', 'An'), ('Con-', 'Con')]
LibM19111001-V06-04-page12.txt: [('-', ''), ('guar-', 'guar')]
LibM19111001-V06-04-page14.txt: [('Etats-', 'Etats'), ('-', ''), ('-', '')]
LibM19111001-V06-04-page16.txt: [('----', '---'), ('AMER-', 'AMER')]
LibM19111001-V06-04-page17.txt: [('rev-', 'rev')]
LibM19111001-V06-04-page18.txt: [('o-', 'o'), ('ex-', 'ex')]
LibM19111001-V06-04-page19.txt: [('-said', 'said'), ('legis-', 'legis')]
LibM19111001-V06-04-page2.txt: [('-', ''), ('-', '')]
LibM19111001-V06-04-page23.txt: [('ex-', 'ex')]
LibM19111001-V06-04-page24.txt: [('-', ''), ('-.', '.'), ('.-', '.')]
LibM19111001-V06-04-page26.txt: [('-', ''), ('con-', 'con')]
LibM19111001-V06-04-page34.txt: [('-', ''), ('-', '')]
LibM19111001-V06-04-page35.txt: [('argu-', 'argu')]
LibM19111001-V06-04-page36.txt: [('CRUM-', 'CRUM')]
LibM19111001-V06-04-page38.txt: [('-', ''), ('-', '')]
LibM19111001-V06-04-page39.txt: [('-', ''), ('-', '')]
LibM19111001-V06-04-page40.txt: [('-', '')]
LibM19111001-V06-04-page42.txt: [('-', '')]
LibM19111001-V06-04-page48.txt: [('-', '')]
LibM19111001-V06-04-page49.txt: [('-', ''), ('-', ''), ('PDNIam-', 'PDNIam')]
LibM19111001-V06-04-page5.txt: [('o-', 'o'), ('-', '')]
LibM19111001-V06-04-page50.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19111001-V06-04-page52.txt: [('-li', 'li'), ('Ra-', 'Ra'), ('-li', 'li')]
LibM19111001-V06-04-page8.txt: [('-', '')]
LibM19120101-V07-01-page12.txt: [('-', ''), ('-', '')]
LibM19120101-V07-01-page15.txt: [('assess-', 'assess'), ('com-', 'com')]
LibM19120101-V07-01-page19.txt: [('-other', 'other')]
LibM19120101-V07-01-page2.txt: [('-', ''), ('-', '')]
LibM19120101-V07-01-page22.txt: [('com-', 'com')]
LibM19120101-V07-01-page26.txt: [('Novem-', 'Novem')]
LibM19120101-V07-01-page27.txt: [('Pan-', 'Pan')]
LibM19120101-V07-01-page33.txt: [('-', '')]
LibM19120101-V07-01-page37.txt: [('-', '')]
LibM19120101-V07-01-page38.txt: [('Brigadier-', 'Brigadier'), ('fin-', 'fin')]
LibM19120101-V07-01-page39.txt: [('-', '')]
LibM19120101-V07-01-page42.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19120101-V07-01-page43.txt: [('-', ''), ('ESTAB-', 'ESTAB')]
LibM19120101-V07-01-page45.txt: [('-', ''), ('-', '')]
LibM19120101-V07-01-page46.txt: [('-', '')]
LibM19120101-V07-01-page47.txt: [('-.', '.'), ('.-', '.')]
LibM19120101-V07-01-page49.txt: [('devel-', 'devel'), ('-', ''), ('PM-', 'PM'), ('-', ''), ('p-', 'p')]
LibM19120101-V07-01-page50.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19120101-V07-01-page6.txt: [('-', '')]
LibM19120101-V07-01-page7.txt: [('-', ''), ('Fairbanks-Roosevelt-', 'Fairbanks-Roosevelt')]
LibM19120101-V07-01-page8.txt: [('-', ''), ('-', '')]
LibM19120101-V07-01-page9.txt: [('be-', 'be')]
LibM19120401-V07-02-page2.txt: [('-', '')]
LibM19120401-V07-02-page21.txt: [('--', '-')]
LibM19120401-V07-02-page25.txt: [('no-', 'no')]
LibM19120401-V07-02-page26.txt: [('divi-', 'divi'), ('mat-', 'mat')]
LibM19120401-V07-02-page27.txt: [('di-', 'di')]
LibM19120401-V07-02-page29.txt: [('--', '-')]
LibM19120401-V07-02-page30.txt: [('-', '')]
LibM19120401-V07-02-page31.txt: [('un-', 'un')]
LibM19120401-V07-02-page33.txt: [('un-', 'un'), ('be-', 'be')]
LibM19120401-V07-02-page34.txt: [('un-', 'un')]
LibM19120401-V07-02-page36.txt: [('Accord-', 'Accord')]
LibM19120401-V07-02-page37.txt: [('-', '')]
LibM19120401-V07-02-page38.txt: [('-', ''), ('-', ''), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-icx-m', 'icx-m'), ('Xl-td-', 'Xl-td'), ('ec-', 'ec'), ('-', ''), ('-', ''), ('-', ''), ('-mensisZ."\'"-', 'mensisZ."\'"-')]
LibM19120401-V07-02-page4.txt: [('-...', '...')]
LibM19120401-V07-02-page40.txt: [('-Sep-', 'Sep-'), ('-szera', 'szera'), ('-ilre', 'ilre')]
LibM19120401-V07-02-page42.txt: [('discus-', 'discus'), ('-sion', 'sion')]
LibM19120401-V07-02-page48.txt: [('-', ''), ('LIB-', 'LIB'), ('-', '')]
LibM19120401-V07-02-page49.txt: [('-', '')]
LibM19120401-V07-02-page51.txt: [('-Seven', 'Seven'), ('ar-', 'ar')]
LibM19120401-V07-02-page6.txt: [('Cali-', 'Cali')]
LibM19120401-V07-02-page7.txt: [('non-', 'non'), ('-', '')]
LibM19120401-V07-02-page8.txt: [('for-', 'for')]
LibM19120701-V07-03-page11.txt: [('be-', 'be')]
LibM19120701-V07-03-page13.txt: [('anti-', 'anti'), ('-rotest', 'rotest'), ('hol-', 'hol')]
LibM19120701-V07-03-page14.txt: [('-', ''), ('-', '')]
LibM19120701-V07-03-page16.txt: [('Sec-', 'Sec')]
LibM19120701-V07-03-page17.txt: [('-', ''), ('com-', 'com')]
LibM19120701-V07-03-page18.txt: [('distinct-', 'distinct')]
LibM19120701-V07-03-page2.txt: [('Co-', 'Co'), ('-', '')]
LibM19120701-V07-03-page20.txt: [('-ss', 'ss')]
LibM19120701-V07-03-page21.txt: [('-', ''), ('-', ''), ('estab-', 'estab')]
LibM19120701-V07-03-page22.txt: [('re-', 're'), ('--', '-')]
LibM19120701-V07-03-page25.txt: [('-', '')]
LibM19120701-V07-03-page28.txt: [('unveil-', 'unveil')]
LibM19120701-V07-03-page30.txt: [('-', '')]
LibM19120701-V07-03-page37.txt: [('-', '')]
LibM19120701-V07-03-page38.txt: [('con-', 'con'), ('AMEND-', 'AMEND'), ('-', '')]
LibM19120701-V07-03-page4.txt: [('-ititeiltintonecfctration', 'ititeiltintonecfctration'), ('-', ''), ('-', ''), ('-', ''), ('e.n.d....-', 'e.n.d....'), ('i-', 'i'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('s-', 's'), ('-', ''), ('-fr', 'fr'), ('ee-', 'ee'), ('-', ''), ('-', ''), ('-..-.', '..-.'), ('f--', 'f-'), ('otb-', 'otb'), ('......-', '......'), ('--', '-'), ('-', ''), ('-a', 'a'), ('-.', '.'), ('-e-', 'e-'), ('-', ''), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('d-', 'd'), ('..---', '..--'), ('.i..-', '.i..'), ('..ta.--', '..ta.-'), ('.-', '.'), ('...-', '...'), ('-w', 'w'), ('x.t-', 'x.t'), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('..-', '..'), ('-', ''), ('.-', '.'), ('..g-Z-', '..g-Z'), ('---.', '--.'), ('--', '-'), ('-', ''), ("---'", "--'"), ('--ft', '-ft'), ('----a', '---a'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-sfo', 'sfo'), ('-', ''), ('....-', '....'), ('-', ''), ('a...-', 'a...'), ('-', ''), ('-.', '.'), ('-', ''), ('--', '-'), ("-.i'", ".i'"), ('N.-', 'N.'), ('m-', 'm'), ('-', ''), ('-', ''), ('-', ''), ('dfr.d.-', 'dfr.d.'), ('-e', 'e'), ('ap-', 'ap'), ('-.onia', '.onia'), ('-', ''), ('-', ''), ('-.', '.'), ('.-', '.'), ('--z', '-z'), ('-', ''), ('-', ''), ('-', ''), ('...-vr-', '...-vr'), ('-.', '.'), ('.-', '.'), ('-', ''), ('e-', 'e'), ('-', ''), ('-e', 'e'), ('-..', '..'), ('-A.c.....', 'A.c.....'), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-..g.', '..g.'), ('-.', '.'), ('g--', 'g-'), ('-', ''), ("--'", "-'"), ('-inio', 'inio'), ('-LI', 'LI'), ('-I', 'I'), ('-...', '...'), ('N.-', 'N.'), ('n-', 'n'), ('.ea...-', '.ea...'), ('-a', 'a'), ('-', ''), ('-', ''), ('-i-', 'i-'), ('-..a.A.', '..a.A.'), ('h---', 'h--'), ('.-', '.'), ('-', ''), ('--.r..', '-.r..'), ('.-', '.'), ('-.-', '.-'), ('-', ''), ('---', '--'), ("-'", "'"), ('---is.', '--is.'), ('-', ''), ('-r', 'r'), ('--Yelor.', '-Yelor.'), ('-.', '.'), ('-....-..C.', '....-..C.'), ('-', ''), ('-ir."...ezi..i..', 'ir."...ezi..i..'), ('-', ''), ('-', ''), ('e.e.-', 'e.e.'), ('-', ''), ('-', ''), ('..-', '..'), ('-', ''), ('-"', '"'), ('-', ''), ('-.', '.'), ('-', ''), ('....-', '....'), ('-', ''), ('-', ''), ('-', ''), ('"....-', '"....'), ('............nen-', '............nen'), ('--..z..', '-..z..'), ('I-', 'I'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('.-', '.'), ('--', '-'), ('-', ''), ('--Ve', '-Ve'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-a-', 'a-'), ('-', ''), ('.-', '.'), ('-a.Cdr', 'a.Cdr'), ('-', ''), ('eartc-', 'eartc'), ('--', '-'), ('-.-', '.-'), ('..-', '..'), ('-..-.....', '..-.....'), ('-', ''), ('....-', '....'), ('-', ''), ('.--', '.-'), ('--.....', '-.....'), ('-', ''), ('.-', '.'), ("-'..", "'.."), ('-', ''), ('-', ''), ('-', ''), ('-riK-', 'riK-'), ('-', ''), ('-.', '.'), ('--', '-'), ('---', '--'), ('--', '-'), ('-', ''), ('-', ''), ('-.', '.'), ('--r', '-r'), ('.--', '.-'), ('-', ''), ('-...-', '...-')]
LibM19120701-V07-03-page42.txt: [('-', '')]
LibM19120701-V07-03-page43.txt: [('fun-', 'fun'), ('ap-', 'ap')]
LibM19120701-V07-03-page46.txt: [('-', ''), ('-', ''), ('.-', '.')]
LibM19120701-V07-03-page47.txt: [('-', '')]
LibM19120701-V07-03-page48.txt: [('-', ''), ('-', '')]
LibM19120701-V07-03-page49.txt: [('-', ''), ('gentle-', 'gentle')]
LibM19120701-V07-03-page5.txt: [('-', '')]
LibM19120701-V07-03-page51.txt: [('Gov-', 'Gov'), ('dis-', 'dis')]
LibM19120701-V07-03-page52.txt: [('-.', '.'), ('re-', 're'), ('-', ''), ('We-', 'We'), ('-', '')]
LibM19120701-V07-03-page9.txt: [('Pan-', 'Pan'), ('November-', 'November'), ('observ-', 'observ')]
LibM19121001-V07-04-page13.txt: [('-', '')]
LibM19121001-V07-04-page14.txt: [('non-', 'non')]
LibM19121001-V07-04-page15.txt: [('-', ''), ('Postmaster-', 'Postmaster')]
LibM19121001-V07-04-page17.txt: [('-', '')]
LibM19121001-V07-04-page19.txt: [('-', '')]
LibM19121001-V07-04-page2.txt: [('.-', '.'), ('-', ''), ('-', ''), ('Steph-', 'Steph')]
LibM19121001-V07-04-page20.txt: [('mat-', 'mat')]
LibM19121001-V07-04-page21.txt: [('Cath-', 'Cath')]
LibM19121001-V07-04-page23.txt: [('-the', 'the')]
LibM19121001-V07-04-page29.txt: [('deter-', 'deter'), ('-', ''), ('constru-', 'constru'), ('spe-', 'spe')]
LibM19121001-V07-04-page3.txt: [('-', '')]
LibM19121001-V07-04-page31.txt: [('-', '')]
LibM19121001-V07-04-page32.txt: [('-', '')]
LibM19121001-V07-04-page41.txt: [('seek-', 'seek')]
LibM19121001-V07-04-page44.txt: [('relation-', 'relation')]
LibM19121001-V07-04-page5.txt: [('ad-', 'ad')]
LibM19121001-V07-04-page50.txt: [('-..', '..')]
LibM19121001-V07-04-page51.txt: [('-', ''), ('-', ''), ('Answers-', 'Answers')]
LibM19121001-V07-04-page6.txt: [('Orion-', 'Orion'), ('.-', '.'), ('.raityr-', '.raityr'), ('neer.-', 'neer.'), ('V-', 'V'), ('mow-', 'mow')]
LibM19121001-V07-04-page7.txt: [('.-', '.'), ('-', ''), ('-', ''), ('-', '')]
LibM19121001-V07-04-page9.txt: [('-', ''), ('ma-', 'ma')]
LibM19130101-V08-01-page10.txt: [('think-', 'think')]
LibM19130101-V08-01-page11.txt: [('considera-', 'considera')]
LibM19130101-V08-01-page14.txt: [('vigor-', 'vigor')]
LibM19130101-V08-01-page15.txt: [('re-', 're')]
LibM19130101-V08-01-page17.txt: [('---', '--')]
LibM19130101-V08-01-page2.txt: [('Co-', 'Co'), ('lhan-', 'lhan'), ('-', ''), ('MitaM.O.D.mroo.M.O.m.-', 'MitaM.O.D.mroo.M.O.m.')]
LibM19130101-V08-01-page22.txt: [('pro-', 'pro')]
LibM19130101-V08-01-page24.txt: [('-', ''), ('LIB-', 'LIB')]
LibM19130101-V08-01-page25.txt: [('in-', 'in')]
LibM19130101-V08-01-page26.txt: [('inves-', 'inves')]
LibM19130101-V08-01-page27.txt: [('det-', 'det')]
LibM19130101-V08-01-page3.txt: [('-', ''), ('pre-', 'pre'), ('there-', 'there'), ('un-', 'un'), ('-', ''), ('SECOND.-', 'SECOND.'), ('pur-', 'pur'), ('FIRST.-', 'FIRST.'), ('ad-', 'ad')]
LibM19130101-V08-01-page31.txt: [('Atlas-', 'Atlas'), ('individ-', 'individ')]
LibM19130101-V08-01-page32.txt: [('-', ''), ('-', '')]
LibM19130101-V08-01-page35.txt: [('-', '')]
LibM19130101-V08-01-page37.txt: [('-', '')]
LibM19130101-V08-01-page4.txt: [('maga-', 'maga')]
LibM19130101-V08-01-page40.txt: [('-', ''), ('state-', 'state')]
LibM19130101-V08-01-page42.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('freight-', 'freight'), ('ordi-', 'ordi'), ('-went', 'went'), ('-', ''), ('-', '')]
LibM19130101-V08-01-page43.txt: [('-', ''), ('-', ''), ('-bridges', 'bridges'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('x-', 'x'), ('-', ''), ('-', ''), ('-', ''), ('-Io', 'Io'), ('-', ''), ('-', '')]
LibM19130101-V08-01-page44.txt: [('work-', 'work'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('serv-', 'serv')]
LibM19130101-V08-01-page45.txt: [('-', ''), ('xo-', 'xo'), ('i-', 'i'), ('-', ''), ('-', ''), ('x-', 'x'), ('s-', 's'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19130101-V08-01-page46.txt: [('-', '')]
LibM19130101-V08-01-page5.txt: [('ad-', 'ad'), ('-', '')]
LibM19130101-V08-01-page50.txt: [('An-', 'An'), ('Ar-', 'Ar'), ('-AMERICAN', 'AMERICAN'), ('Re-', 'Re'), ('So-', 'So'), ('-', ''), ('POST-', 'POST')]
LibM19130101-V08-01-page51.txt: [('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('r---------', 'r--------'), ('-Nr', 'Nr'), ('-', ''), ('-', ''), ('c-', 'c')]
LibM19130101-V08-01-page52.txt: [('-page', 'page')]
LibM19130101-V08-01-page9.txt: [('Na-', 'Na')]
LibM19130401-V08-02-page12.txt: [('min-', 'min')]
LibM19130401-V08-02-page14.txt: [('Co-', 'Co')]
LibM19130401-V08-02-page2.txt: [('-earoominmerk', 'earoominmerk'), ('al-', 'al'), ('-', ''), ('affil-', 'affil')]
LibM19130401-V08-02-page21.txt: [('-', '')]
LibM19130401-V08-02-page22.txt: [('-is', 'is')]
LibM19130401-V08-02-page24.txt: [('-', ''), ('-', ''), ('pro-', 'pro')]
LibM19130401-V08-02-page25.txt: [('-', '')]
LibM19130401-V08-02-page27.txt: [('rea-', 'rea')]
LibM19130401-V08-02-page28.txt: [('-', '')]
LibM19130401-V08-02-page3.txt: [('ad-', 'ad'), ('pur-', 'pur'), ('CITI-', 'CITI'), ('PRE-', 'PRE'), ('Strug-', 'Strug'), ('CHOOS-', 'CHOOS'), ('enjoy-', 'enjoy'), ('PRIN-', 'PRIN'), ('sub-', 'sub'), ('whole-', 'whole')]
LibM19130401-V08-02-page30.txt: [('.ex-', '.ex'), ('-', ''), ('Philadel-', 'Philadel'), ('reso-', 'reso'), ('Scot-', 'Scot'), ('visit-', 'visit'), ('set-', 'set'), ('his-', 'his'), ('re-', 're'), ('hu-', 'hu'), ('con-', 'con')]
LibM19130401-V08-02-page34.txt: [('hav-', 'hav'), ('cer-', 'cer'), ('un-', 'un'), ('ac-', 'ac'), ('ad-', 'ad'), ('maintain-', 'maintain')]
LibM19130401-V08-02-page39.txt: [('mil-', 'mil')]
LibM19130401-V08-02-page4.txt: [('recom-', 'recom')]
LibM19130401-V08-02-page42.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-shops.', 'shops.')]
LibM19130401-V08-02-page43.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('subse-', 'subse'), ('o-', 'o'), ('sub-', 'sub'), ('loo-', 'loo'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19130401-V08-02-page44.txt: [('-', ''), ('-', ''), ('-', ''), ('-r', 'r'), ('Sat-', 'Sat'), ('-', ''), ('-a', 'a'), ('-a', 'a')]
LibM19130401-V08-02-page45.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19130401-V08-02-page46.txt: [('-', ''), ('o-', 'o'), ('begin-', 'begin')]
LibM19130401-V08-02-page47.txt: [('-', '')]
LibM19130401-V08-02-page49.txt: [('Albu-', 'Albu')]
LibM19130401-V08-02-page5.txt: [('ad-', 'ad')]
LibM19130401-V08-02-page50.txt: [('Ar-', 'Ar'), ('An-', 'An'), ('Re-', 'Re'), ('-', ''), ('POST-', 'POST')]
LibM19130401-V08-02-page51.txt: [('-', ''), ('----', '---'), ('--', '-'), ('-----', '----'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.-', '.-'), ('-.-.', '.-.'), ('.-', '.'), ('.-', '.'), ('---"', '--"'), ('--.', '-.'), ('-Mt', 'Mt'), ('-', ''), ('JUSTI-', 'JUSTI')]
LibM19130401-V08-02-page52.txt: [('-page', 'page')]
LibM19130701-V08-03-page10.txt: [('Mc-', 'Mc'), ('Re-', 'Re')]
LibM19130701-V08-03-page14.txt: [('-T.', 'T.')]
LibM19130701-V08-03-page17.txt: [('al-', 'al')]
LibM19130701-V08-03-page18.txt: [('exer-', 'exer')]
LibM19130701-V08-03-page2.txt: [('Seen-p.deffeatv-', 'Seen-p.deffeatv'), ('-eury.', 'eury.'), ('-eiteile', 'eiteile'), ('rhah-', 'rhah'), ('-', ''), ('-eeedie', 'eeedie'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-Yezaedi', 'Yezaedi'), ('-eiraeznactmew', 'eiraeznactmew'), ('-erga-evia', 'erga-evia'), ('-W', 'W'), ('-e', 'e'), ('--elt', '-elt'), ('-e', 'e'), ('MgetOofm-', 'MgetOofm'), ('SaFVtel-', 'SaFVtel'), ('-ix', 'ix')]
LibM19130701-V08-03-page21.txt: [('sena-', 'sena')]
LibM19130701-V08-03-page22.txt: [('corn-', 'corn')]
LibM19130701-V08-03-page26.txt: [('-', ''), ('-', ''), ("-'", "'"), ('-', ''), ('-', '')]
LibM19130701-V08-03-page27.txt: [('-.', '.'), ('...-', '...'), ('-', ''), ('.......--', '.......-'), ('-', ''), ('-....', '....'), ('-"..r...', '"..r...'), ('-', ''), ('-', ''), ('-', ''), ('-.-', '.-'), ('-.', '.'), ('-.', '.'), ('-', ''), ('-...', '...'), ('----.--', '---.--'), ('-.........', '.........'), ('-........"', '........"'), ('-', ''), ('-', ''), ('-', '')]
LibM19130701-V08-03-page29.txt: [('Sun-', 'Sun'), ('restric-', 'restric'), ('re-', 're')]
LibM19130701-V08-03-page3.txt: [('--HE', '-HE'), ('CITIZEN-', 'CITIZEN'), ('CHOOS-', 'CHOOS'), ('enjoy-', 'enjoy'), ('PRIN-', 'PRIN')]
LibM19130701-V08-03-page30.txt: [('exer-', 'exer')]
LibM19130701-V08-03-page32.txt: [('in-', 'in')]
LibM19130701-V08-03-page33.txt: [('pub-', 'pub')]
LibM19130701-V08-03-page36.txt: [('con-', 'con')]
LibM19130701-V08-03-page39.txt: [('hear-', 'hear'), ('Commis-', 'Commis'), ('Sun-', 'Sun'), ('move-', 'move'), ('Chris-', 'Chris')]
LibM19130701-V08-03-page4.txt: [('-', '')]
LibM19130701-V08-03-page41.txt: [('re-', 're')]
LibM19130701-V08-03-page42.txt: [('GOV-', 'GOV')]
LibM19130701-V08-03-page44.txt: [('-t', 't'), ('-', ''), ('cd-n-', 'cd-n'), ('-ca.z', 'ca.z'), ('zW-', 'zW'), ('A-', 'A')]
LibM19130701-V08-03-page49.txt: [('-', ''), ('-', ''), ('-ICIT', 'ICIT'), ('ADVER-', 'ADVER'), ('-', ''), ('-', ''), ('-eX', 'eX')]
LibM19130701-V08-03-page5.txt: [('-', ''), ('ad-', 'ad')]
LibM19130701-V08-03-page50.txt: [('An-', 'An')]
LibM19130701-V08-03-page51.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19130701-V08-03-page6.txt: [('-', ''), ('--', '-')]
LibM19130701-V08-03-page8.txt: [('command-', 'command')]
LibM19130701-V08-03-page9.txt: [('Mc-', 'Mc'), ('Mc-', 'Mc')]
LibM19131001-V08-04-page10.txt: [('un-', 'un')]
LibM19131001-V08-04-page11.txt: [('state-estab-', 'state-estab')]
LibM19131001-V08-04-page12.txt: [('-', '')]
LibM19131001-V08-04-page13.txt: [('Sun-', 'Sun'), ('with-', 'with'), ('extrav-', 'extrav'), ('preseri-', 'preseri')]
LibM19131001-V08-04-page14.txt: [('Babylo-', 'Babylo')]
LibM19131001-V08-04-page18.txt: [('--', '-')]
LibM19131001-V08-04-page2.txt: [('-', '')]
LibM19131001-V08-04-page20.txt: [('-', '')]
LibM19131001-V08-04-page22.txt: [('mo-', 'mo')]
LibM19131001-V08-04-page25.txt: [('ex-', 'ex'), ('ex-', 'ex'), ('Sun-', 'Sun'), ('ex-', 'ex')]
LibM19131001-V08-04-page26.txt: [('pre-', 'pre')]
LibM19131001-V08-04-page28.txt: [('-uncontrolled', 'uncontrolled')]
LibM19131001-V08-04-page29.txt: [('in-', 'in')]
LibM19131001-V08-04-page3.txt: [('CHOOS-', 'CHOOS'), ('PRIN-', 'PRIN'), ('enjoy-', 'enjoy'), ('intol-', 'intol'), ('sub-', 'sub'), ('whole-', 'whole'), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('ad-', 'ad'), ("'q-", "'q")]
LibM19131001-V08-04-page30.txt: [('com-', 'com')]
LibM19131001-V08-04-page31.txt: [('Re-', 'Re')]
LibM19131001-V08-04-page33.txt: [('ab-', 'ab')]
LibM19131001-V08-04-page34.txt: [('uni-', 'uni'), ('prog-', 'prog')]
LibM19131001-V08-04-page36.txt: [('-', ''), ('-', ''), ('-', ''), ('relig-', 'relig')]
LibM19131001-V08-04-page39.txt: [('--', '-')]
LibM19131001-V08-04-page4.txt: [('-', ''), ('-', ''), ('-o', 'o')]
LibM19131001-V08-04-page41.txt: [('-', '')]
LibM19131001-V08-04-page43.txt: [('-questions', 'questions')]
LibM19131001-V08-04-page44.txt: [('govern-', 'govern')]
LibM19131001-V08-04-page45.txt: [('D-', 'D'), ('-', '')]
LibM19131001-V08-04-page46.txt: [('-is', 'is')]
LibM19131001-V08-04-page49.txt: [('ADVER-', 'ADVER')]
LibM19131001-V08-04-page5.txt: [('ad-', 'ad')]
LibM19131001-V08-04-page50.txt: [('-', ''), ('-', ''), ('Ar-', 'Ar')]
LibM19131001-V08-04-page51.txt: [('mission-', 'mission')]
LibM19131001-V08-04-page52.txt: [('--', '-'), ('-', ''), ("'.-", "'."), ('-', ''), ('-', ''), ('-.IA', '.IA')]
LibM19131001-V08-04-page7.txt: [('-MMI.', 'MMI.'), ('M.-', 'M.'), ('-MED.', 'MED.'), ('-', ''), ('-rthe', 'rthe')]
LibM19140101-V09-01-page1.txt: [('-', '')]
LibM19140101-V09-01-page11.txt: [('-MWOO', 'MWOO'), ('-', '')]
LibM19140101-V09-01-page18.txt: [('-I', 'I'), ('-', ''), ('-from', 'from'), ('prin-', 'prin')]
LibM19140101-V09-01-page19.txt: [('cler-', 'cler'), ('-that', 'that')]
LibM19140101-V09-01-page2.txt: [('-mm.', 'mm.'), ('-', '')]
LibM19140101-V09-01-page21.txt: [('-all', 'all')]
LibM19140101-V09-01-page23.txt: [('-entered', 'entered'), ('heaven-', 'heaven'), ('-', ''), ('govern-', 'govern'), ('syn-', 'syn'), ('be-', 'be'), ('-result', 'result'), ('with-', 'with')]
LibM19140101-V09-01-page25.txt: [('-rights', 'rights')]
LibM19140101-V09-01-page26.txt: [('-our', 'our'), ('con-', 'con')]
LibM19140101-V09-01-page27.txt: [('-for', 'for')]
LibM19140101-V09-01-page29.txt: [('-', ''), ('-Sabbath', 'Sabbath'), ('un-', 'un')]
LibM19140101-V09-01-page3.txt: [('LIBER-', 'LIBER'), ('-inch', 'inch'), ('CHANG-', 'CHANG'), ('CARE-', 'CARE')]
LibM19140101-V09-01-page30.txt: [('king--', 'king-'), ('-', '')]
LibM19140101-V09-01-page31.txt: [('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', '')]
LibM19140101-V09-01-page33.txt: [('-are', 'are')]
LibM19140101-V09-01-page36.txt: [('-', '')]
LibM19140101-V09-01-page38.txt: [('Chris-', 'Chris'), ('com-', 'com'), ('Re-', 'Re'), ('-', ''), ('-', '')]
LibM19140101-V09-01-page43.txt: [('prob-', 'prob')]
LibM19140101-V09-01-page44.txt: [('BUILD-', 'BUILD')]
LibM19140101-V09-01-page46.txt: [('say-', 'say'), ('an--', 'an-')]
LibM19140101-V09-01-page47.txt: [('-', '')]
LibM19140101-V09-01-page48.txt: [('citi-', 'citi')]
LibM19140101-V09-01-page52.txt: [('-.', '.')]
LibM19140101-V09-01-page53.txt: [('-', ''), ('e.A-', 'e.A'), ('Ar-', 'Ar'), ('An-', 'An')]
LibM19140101-V09-01-page54.txt: [('-i', 'i'), ('Albu-', 'Albu'), ('Aven-', 'Aven')]
LibM19140101-V09-01-page55.txt: [('-', ''), ('-.', '.')]
LibM19140101-V09-01-page56.txt: [('-VoPr', 'VoPr'), ('-', ''), ('-NA', 'NA'), ('-.N', '.N')]
LibM19140101-V09-01-page8.txt: [('-', '')]
LibM19140101-V09-01-page9.txt: [('ad-', 'ad')]
LibM19140401-V09-02-page1.txt: [('--gm', '-gm')]
LibM19140401-V09-02-page11.txt: [('-', ''), ('be-', 'be')]
LibM19140401-V09-02-page12.txt: [('al-', 'al'), ('combina-', 'combina'), ('coun-', 'coun'), ('un-', 'un')]
LibM19140401-V09-02-page13.txt: [('-object', 'object'), ('.-', '.')]
LibM19140401-V09-02-page14.txt: [('-intolerant', 'intolerant'), ('prod-', 'prod')]
LibM19140401-V09-02-page15.txt: [('Sun-', 'Sun')]
LibM19140401-V09-02-page16.txt: [('-', '')]
LibM19140401-V09-02-page17.txt: [('-', ''), ('ob-', 'ob')]
LibM19140401-V09-02-page18.txt: [('ASSEM-', 'ASSEM')]
LibM19140401-V09-02-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('af-', 'af'), ('-', '')]
LibM19140401-V09-02-page22.txt: [('Con-', 'Con')]
LibM19140401-V09-02-page25.txt: [('-', ''), ('-', ''), ('citi-', 'citi'), ('stat-', 'stat')]
LibM19140401-V09-02-page26.txt: [('prop-', 'prop')]
LibM19140401-V09-02-page27.txt: [('funda-', 'funda'), ('-', '')]
LibM19140401-V09-02-page29.txt: [('-', '')]
LibM19140401-V09-02-page3.txt: [('CIRCULAT-', 'CIRCULAT')]
LibM19140401-V09-02-page30.txt: [('forty-', 'forty')]
LibM19140401-V09-02-page32.txt: [('-', '')]
LibM19140401-V09-02-page33.txt: [('en-', 'en')]
LibM19140401-V09-02-page35.txt: [('PROTES-', 'PROTES')]
LibM19140401-V09-02-page36.txt: [('in-', 'in')]
LibM19140401-V09-02-page38.txt: [('-', '')]
LibM19140401-V09-02-page41.txt: [('MAGA-', 'MAGA')]
LibM19140401-V09-02-page43.txt: [('BE-', 'BE'), ('-', '')]
LibM19140401-V09-02-page44.txt: [('A-i-', 'A-i'), ('.-', '.'), ('PARTNER-', 'PARTNER')]
LibM19140401-V09-02-page46.txt: [('-', '')]
LibM19140401-V09-02-page48.txt: [('-.', '.')]
LibM19140401-V09-02-page49.txt: [('---.', '--.'), ('---il', '--il')]
LibM19140401-V09-02-page5.txt: [('ad-', 'ad')]
LibM19140401-V09-02-page50.txt: [('ADVER-', 'ADVER')]
LibM19140401-V09-02-page52.txt: [('dan-', 'dan'), ('stern-', 'stern'), ('in-', 'in'), ('re-', 're'), ('-', ''), ('-', '')]
LibM19140401-V09-02-page6.txt: [('-', '')]
LibM19140401-V09-02-page7.txt: [('-', ''), ('MWO-', 'MWO'), ('MOD-', 'MOD'), ('glo-', 'glo')]
LibM19140701-V09-03-page10.txt: [('sacra-', 'sacra'), ('-Surely', 'Surely'), ('op-', 'op')]
LibM19140701-V09-03-page11.txt: [('estab-', 'estab')]
LibM19140701-V09-03-page12.txt: [('--', '-')]
LibM19140701-V09-03-page15.txt: [('transi-', 'transi')]
LibM19140701-V09-03-page17.txt: [('sub-', 'sub'), ('re-', 're')]
LibM19140701-V09-03-page2.txt: [('-', '')]
LibM19140701-V09-03-page24.txt: [('-', ''), ('-', '')]
LibM19140701-V09-03-page27.txt: [('-ence', 'ence')]
LibM19140701-V09-03-page29.txt: [('-', '')]
LibM19140701-V09-03-page3.txt: [('-', ''), ('-', ''), ('CIRCULAT-', 'CIRCULAT')]
LibM19140701-V09-03-page31.txt: [('free-', 'free')]
LibM19140701-V09-03-page33.txt: [('se-', 'se')]
LibM19140701-V09-03-page34.txt: [('prop-', 'prop'), ('-', ''), ('ambi-', 'ambi')]
LibM19140701-V09-03-page35.txt: [('-', '')]
LibM19140701-V09-03-page36.txt: [('-', ''), ('rea-', 'rea'), ('Chris-', 'Chris')]
LibM19140701-V09-03-page39.txt: [('-', ''), ('boy-', 'boy')]
LibM19140701-V09-03-page4.txt: [('magazine-', 'magazine'), ('-', '')]
LibM19140701-V09-03-page40.txt: [('itsfunda-', 'itsfunda'), ('-theft', 'theft')]
LibM19140701-V09-03-page42.txt: [('-Most', 'Most'), ('-', '')]
LibM19140701-V09-03-page44.txt: [('prohibit-', 'prohibit')]
LibM19140701-V09-03-page48.txt: [('-.', '.')]
LibM19140701-V09-03-page49.txt: [('-', ''), ('k-', 'k'), ('-i..', 'i..'), ('arwl-A-', 'arwl-A'), ('-', ''), ('-"', '"'), ("'-", "'"), ('-', '')]
LibM19140701-V09-03-page5.txt: [('-', ''), ('ad-', 'ad')]
LibM19140701-V09-03-page51.txt: [('V-', 'V'), ('-The', 'The'), ('."-', '."')]
LibM19140701-V09-03-page7.txt: [('be-', 'be')]
LibM19141001-V09-04-page10.txt: [('-', ''), ('-', ''), ('fail-', 'fail')]
LibM19141001-V09-04-page11.txt: [('-', ''), ('-the', 'the')]
LibM19141001-V09-04-page13.txt: [('bless-', 'bless'), ('re-', 're')]
LibM19141001-V09-04-page14.txt: [('re-', 're')]
LibM19141001-V09-04-page18.txt: [('Robes-', 'Robes'), ('be-', 'be')]
LibM19141001-V09-04-page19.txt: [("-law.'", "law.'")]
LibM19141001-V09-04-page2.txt: [('-', ''), ('-', '')]
LibM19141001-V09-04-page22.txt: [('penal-', 'penal')]
LibM19141001-V09-04-page26.txt: [('TI-', 'TI')]
LibM19141001-V09-04-page27.txt: [('en-', 'en'), ('say-', 'say')]
LibM19141001-V09-04-page29.txt: [('Medo-', 'Medo'), ('es-', 'es')]
LibM19141001-V09-04-page30.txt: [('-in', 'in'), ('-note', 'note')]
LibM19141001-V09-04-page31.txt: [('AMERI-', 'AMERI')]
LibM19141001-V09-04-page33.txt: [('an-', 'an'), ('-', ''), ('Star-', 'Star'), ('-', ''), ('-', '')]
LibM19141001-V09-04-page34.txt: [('-', ''), ('STAR-', 'STAR')]
LibM19141001-V09-04-page35.txt: [('-', ''), ('rz-', 'rz'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('.--', '.-'), ('-rs.', 'rs.'), ('-', ''), ('-', ''), ('.-', '.'), ('-', '')]
LibM19141001-V09-04-page36.txt: [('espe--', 'espe-'), ('--', '-')]
LibM19141001-V09-04-page37.txt: [('wor-', 'wor')]
LibM19141001-V09-04-page38.txt: [('es-', 'es')]
LibM19141001-V09-04-page39.txt: [('-', '')]
LibM19141001-V09-04-page4.txt: [('ad-', 'ad'), ('M-', 'M')]
LibM19141001-V09-04-page42.txt: [('op-', 'op')]
LibM19141001-V09-04-page43.txt: [('here.-', 'here.'), ("'-", "'")]
LibM19141001-V09-04-page44.txt: [('away.-', 'away.')]
LibM19141001-V09-04-page46.txt: [('-', ''), ('"-', '"')]
LibM19141001-V09-04-page48.txt: [('-', '')]
LibM19141001-V09-04-page49.txt: [('-', '')]
LibM19141001-V09-04-page50.txt: [('Twenty-', 'Twenty'), ('-.-', '.-'), ('Mili-', 'Mili'), ('Hala-', 'Hala'), ('-.', '.'), ('At-', 'At'), ('Lan-', 'Lan'), ('-rli', 'rli'), ('Tram-', 'Tram'), ('J-', 'J'), ('Pe-', 'Pe'), ('Albu-', 'Albu'), ('LI-', 'LI'), ('Bloom-', 'Bloom'), ('--', '-'), ('-.', '.'), ('-', ''), ('-', '')]
LibM19141001-V09-04-page51.txt: [('-', ''), ('--', '-'), ('.f------', '.f-----'), ('-----', '----'), ('-TESTINC', 'TESTINC'), ('-i', 'i'), ('monarchi-', 'monarchi'), ('Con-', 'Con'), ('-', '')]
LibM19141001-V09-04-page52.txt: [('DR.A-', 'DR.A'), ('-', ''), ('-.', '.')]
LibM19141001-V09-04-page7.txt: [('-', ''), ('-', ''), ('-.MM', '.MM')]
LibM19141001-V09-04-page8.txt: [('-', ''), ('na-', 'na'), ('Har-', 'Har'), ('-', '')]
LibM19141001-V09-04-page9.txt: [('-', ''), ('-', '')]
LibM19150101-V10-01-page10.txt: [('-', '')]
LibM19150101-V10-01-page11.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19150101-V10-01-page13.txt: [('Con-', 'Con')]
LibM19150101-V10-01-page15.txt: [('re-', 're')]
LibM19150101-V10-01-page16.txt: [('-', ''), ('-i', 'i'), ('---', '--')]
LibM19150101-V10-01-page17.txt: [('discrimi-', 'discrimi')]
LibM19150101-V10-01-page18.txt: [('-', '')]
LibM19150101-V10-01-page2.txt: [('pre-', 'pre'), ('-', ''), ('affil-', 'affil')]
LibM19150101-V10-01-page20.txt: [('-', '')]
LibM19150101-V10-01-page25.txt: [('-io.', 'io.'), ('-', ''), ('-', ''), ('destruc-', 'destruc')]
LibM19150101-V10-01-page26.txt: [('-', ''), ('-', '')]
LibM19150101-V10-01-page27.txt: [('declared-', 'declared')]
LibM19150101-V10-01-page28.txt: [('Sat-', 'Sat')]
LibM19150101-V10-01-page29.txt: [('viola-', 'viola')]
LibM19150101-V10-01-page3.txt: [('magae-', 'magae'), ('SUBSCRIP-', 'SUBSCRIP')]
LibM19150101-V10-01-page30.txt: [('-I', 'I')]
LibM19150101-V10-01-page31.txt: [('prohibit-', 'prohibit')]
LibM19150101-V10-01-page34.txt: [('Star-', 'Star')]
LibM19150101-V10-01-page35.txt: [('cathe-', 'cathe')]
LibM19150101-V10-01-page36.txt: [('of-', 'of')]
LibM19150101-V10-01-page38.txt: [('fol-', 'fol')]
LibM19150101-V10-01-page39.txt: [('-The', 'The')]
LibM19150101-V10-01-page41.txt: [('A-', 'A'), ('-', '')]
LibM19150101-V10-01-page42.txt: [('to-', 'to')]
LibM19150101-V10-01-page45.txt: [('-', '')]
LibM19150101-V10-01-page46.txt: [('-', '')]
LibM19150101-V10-01-page47.txt: [('or-', 'or')]
LibM19150101-V10-01-page48.txt: [('Alco-', 'Alco')]
LibM19150101-V10-01-page50.txt: [('Sunday.-', 'Sunday.'), ('-', ''), ('-sorrow', 'sorrow'), ('-', ''), ('-', ''), ('an-', 'an')]
LibM19150101-V10-01-page51.txt: [('Ti-', 'Ti'), ('-', '')]
LibM19150101-V10-01-page52.txt: [('Mill-', 'Mill'), ('Rap-', 'Rap'), ('.mmmmEiv-', '.mmmmEiv'), ('Trum-', 'Trum'), ('Pe-', 'Pe'), ('Lan-', 'Lan'), ('Luck-', 'Luck'), ('Alba-', 'Alba'), ('Aven-', 'Aven'), ('Bloom-', 'Bloom'), ('-', '')]
LibM19150101-V10-01-page53.txt: [('-', ''), ('FREE-', 'FREE'), ('-', '')]
LibM19150101-V10-01-page8.txt: [('-', '')]
LibM19150401-V10-02-page11.txt: [('intro-', 'intro')]
LibM19150401-V10-02-page12.txt: [('litho-', 'litho'), ('Corn-', 'Corn')]
LibM19150401-V10-02-page14.txt: [('Postmaster-', 'Postmaster'), ('pam-', 'pam')]
LibM19150401-V10-02-page15.txt: [('Postmaster-', 'Postmaster')]
LibM19150401-V10-02-page17.txt: [('-', ''), ('-legislation.', 'legislation.'), ('un-', 'un'), ('pub-', 'pub')]
LibM19150401-V10-02-page18.txt: [('of-', 'of'), ('re-', 're')]
LibM19150401-V10-02-page19.txt: [('Mc-', 'Mc')]
LibM19150401-V10-02-page2.txt: [('-', '')]
LibM19150401-V10-02-page21.txt: [('RE-', 'RE')]
LibM19150401-V10-02-page23.txt: [('free-', 'free'), ('WASH-', 'WASH'), ('reli-', 'reli')]
LibM19150401-V10-02-page25.txt: [('WASH-', 'WASH'), ('Postmaster-', 'Postmaster'), ('de-', 'de'), ('WASH-', 'WASH')]
LibM19150401-V10-02-page26.txt: [('Postmaster-', 'Postmaster')]
LibM19150401-V10-02-page27.txt: [('Cath-', 'Cath')]
LibM19150401-V10-02-page28.txt: [('CAP-', 'CAP')]
LibM19150401-V10-02-page29.txt: [('per-', 'per')]
LibM19150401-V10-02-page3.txt: [('cer-', 'cer'), ('Hear-', 'Hear')]
LibM19150401-V10-02-page30.txt: [('or-', 'or')]
LibM19150401-V10-02-page32.txt: [('gen-', 'gen')]
LibM19150401-V10-02-page36.txt: [('Lot-', 'Lot')]
LibM19150401-V10-02-page38.txt: [('-the', 'the')]
LibM19150401-V10-02-page4.txt: [('.-', '.')]
LibM19150401-V10-02-page41.txt: [('-the', 'the')]
LibM19150401-V10-02-page43.txt: [('.-', '.')]
LibM19150401-V10-02-page44.txt: [('un-', 'un'), ('-', '')]
LibM19150401-V10-02-page46.txt: [('pre-', 'pre'), ('sub-', 'sub'), ("'O-", "'O")]
LibM19150401-V10-02-page48.txt: [('-', ''), ('sa-', 'sa'), ('busi-', 'busi'), ('hence-', 'hence'), ('-', '')]
LibM19150401-V10-02-page49.txt: [('-', ''), ('--', '-'), ('HUN-', 'HUN')]
LibM19150401-V10-02-page5.txt: [('Philip-', 'Philip')]
LibM19150401-V10-02-page50.txt: [('-', ''), ('-', ''), ('.-', '.'), ('-M.', 'M.'), ('.-', '.'), ('signifi-', 'signifi'), ('-', ''), ('-', '')]
LibM19150401-V10-02-page51.txt: [('.--', '.-'), ('-', ''), ('.-', '.'), ('-PER', 'PER')]
LibM19150401-V10-02-page52.txt: [('-', ''), ('-', ''), ('-', ''), ('iiimm--', 'iiimm-'), ('---', '--'), ('Ad-', 'Ad')]
LibM19150401-V10-02-page6.txt: [('---', '--'), ('kc-', 'kc'), ('-', ''), ('PEACE-', 'PEACE'), ('ASSEMB-', 'ASSEMB'), ('-', ''), ('-', ''), ('lost.-', 'lost.'), ('ri-', 'ri'), ('-K', 'K')]
LibM19150401-V10-02-page7.txt: [('-', ''), ('-', '')]
LibM19150401-V10-02-page9.txt: [('-', '')]
LibM19150701-V10-03-page1.txt: [('--', '-')]
LibM19150701-V10-03-page10.txt: [('-', '')]
LibM19150701-V10-03-page14.txt: [('be-', 'be')]
LibM19150701-V10-03-page15.txt: [('-', '')]
LibM19150701-V10-03-page17.txt: [('-', '')]
LibM19150701-V10-03-page2.txt: [('inter-', 'inter'), ('Col-', 'Col'), ('affil-', 'affil'), ('affili-', 'affili'), ('Massa-', 'Massa'), ('Connecti-', 'Connecti')]
LibM19150701-V10-03-page21.txt: [('-', '')]
LibM19150701-V10-03-page22.txt: [('-', ''), ('Chris-', 'Chris'), ('Eng-', 'Eng')]
LibM19150701-V10-03-page25.txt: [('plot-', 'plot')]
LibM19150701-V10-03-page26.txt: [('meth-', 'meth')]
LibM19150701-V10-03-page27.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19150701-V10-03-page28.txt: [('degen-', 'degen')]
LibM19150701-V10-03-page3.txt: [('illus-', 'illus'), ('pro-', 'pro'), ('prohibi-', 'prohibi'), ('SUB-', 'SUB'), ('attor-', 'attor'), ('-', '')]
LibM19150701-V10-03-page30.txt: [('princi-', 'princi')]
LibM19150701-V10-03-page33.txt: [('clas-', 'clas')]
LibM19150701-V10-03-page36.txt: [('zeal-', 'zeal')]
LibM19150701-V10-03-page4.txt: [('ad-', 'ad')]
LibM19150701-V10-03-page42.txt: [('-', ''), ('-sAfd', 'sAfd'), ('-', '')]
LibM19150701-V10-03-page45.txt: [('-', ''), ('-', '')]
LibM19150701-V10-03-page46.txt: [('-', '')]
LibM19150701-V10-03-page47.txt: [('-', ''), ('-', ''), ('caus-', 'caus')]
LibM19150701-V10-03-page49.txt: [('-', '')]
LibM19150701-V10-03-page50.txt: [('Fa-', 'Fa')]
LibM19150701-V10-03-page8.txt: [('-', '')]
LibM19151001-V10-04-page1.txt: [('-', '')]
LibM19151001-V10-04-page10.txt: [('-', '')]
LibM19151001-V10-04-page11.txt: [('lib-', 'lib')]
LibM19151001-V10-04-page12.txt: [('-', '')]
LibM19151001-V10-04-page14.txt: [('pub-', 'pub')]
LibM19151001-V10-04-page15.txt: [('-', ''), ('-', '')]
LibM19151001-V10-04-page18.txt: [('-', '')]
LibM19151001-V10-04-page19.txt: [('-', ''), ('-', ''), ('dis-', 'dis')]
LibM19151001-V10-04-page2.txt: [('-', ''), ('Col-', 'Col'), ('af-', 'af')]
LibM19151001-V10-04-page23.txt: [('former-', 'former')]
LibM19151001-V10-04-page25.txt: [('-', '')]
LibM19151001-V10-04-page26.txt: [('bul-', 'bul'), ('to-', 'to')]
LibM19151001-V10-04-page27.txt: [('-', ''), ('s-', 's'), ('-', '')]
LibM19151001-V10-04-page28.txt: [('reli-', 'reli')]
LibM19151001-V10-04-page30.txt: [('indi-', 'indi')]
LibM19151001-V10-04-page31.txt: [('-proper', 'proper'), ('-', '')]
LibM19151001-V10-04-page33.txt: [('-', '')]
LibM19151001-V10-04-page37.txt: [('-', '')]
LibM19151001-V10-04-page42.txt: [('Panama-', 'Panama'), ('repre-', 'repre')]
LibM19151001-V10-04-page45.txt: [('-', '')]
LibM19151001-V10-04-page48.txt: [('Ama-', 'Ama'), ('Eng-', 'Eng'), ('Bloom-', 'Bloom'), ('-', ''), ('go-', 'go'), ('Mili-', 'Mili')]
LibM19151001-V10-04-page49.txt: [('effec-', 'effec'), ('per-', 'per')]
LibM19151001-V10-04-page51.txt: [('Tem-', 'Tem')]
LibM19151001-V10-04-page7.txt: [('-WARDE', 'WARDE')]
LibM19160101-V11-01-page11.txt: [('legisla-', 'legisla'), ('Peru-', 'Peru')]
LibM19160101-V11-01-page12.txt: [('bish-', 'bish')]
LibM19160101-V11-01-page13.txt: [('-', ''), ('-', '')]
LibM19160101-V11-01-page18.txt: [('institu-', 'institu')]
LibM19160101-V11-01-page21.txt: [('-', '')]
LibM19160101-V11-01-page23.txt: [('-', ''), ('-', ''), ('-', ''), ('lan-', 'lan')]
LibM19160101-V11-01-page25.txt: [('prob-', 'prob')]
LibM19160101-V11-01-page27.txt: [('be-', 'be')]
LibM19160101-V11-01-page28.txt: [('-', '')]
LibM19160101-V11-01-page30.txt: [('perni-', 'perni')]
LibM19160101-V11-01-page35.txt: [('Postmaster-', 'Postmaster')]
LibM19160101-V11-01-page36.txt: [('-I', 'I')]
LibM19160101-V11-01-page4.txt: [('-', '')]
LibM19160101-V11-01-page44.txt: [('-', ''), ('-', ''), ('-e....lft', 'e....lft'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('......-', '......'), ('...-', '...'), ('-', ''), ('-', ''), ('-', ''), ('....-', '....'), ('-', ''), ('--', '-'), ('-', ''), ('".-r-', '".-r'), ('-', ''), ('-', ''), ('-', ''), ('-..', '..'), ("-'t", "'t"), ('-', ''), ('---', '--'), ('-', ''), ('-', ''), ('-', ''), ('A-', 'A'), ('--', '-'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-f...-V', 'f...-V'), ('--', '-'), ('-', ''), ('-', ''), ('-', '')]
LibM19160101-V11-01-page45.txt: [('con-', 'con')]
LibM19160101-V11-01-page5.txt: [('--', '-')]
LibM19160101-V11-01-page6.txt: [('KEN-', 'KEN')]
LibM19160101-V11-01-page8.txt: [('-', '')]
LibM19160101-V11-01-page9.txt: [('--', '-')]
LibM19160101-V11-01e-page11.txt: [('drug-', 'drug')]
LibM19160101-V11-01e-page16.txt: [('-IN', 'IN'), ('repro-', 'repro'), ('en-', 'en'), ('mail-', 'mail'), ('-with', 'with'), ('pub-', 'pub'), ('assur-', 'assur'), ('mails-', 'mails')]
LibM19160101-V11-01e-page3.txt: [('-', '')]
LibM19160101-V11-01e-page4.txt: [('liberty-', 'liberty'), ('jury.--', 'jury.-'), ('-', '')]
LibM19160101-V11-01e-page5.txt: [('senti-', 'senti')]
LibM19160101-V11-01e-page9.txt: [('P-', 'P'), ('be-', 'be')]
LibM19160401-V11-02-page1.txt: [('-', '')]
LibM19160401-V11-02-page10.txt: [('OBSERV-', 'OBSERV'), ("'Na-", "'Na")]
LibM19160401-V11-02-page13.txt: [('be-', 'be'), ('au-', 'au')]
LibM19160401-V11-02-page14.txt: [('-', ''), ('persecution.--', 'persecution.-')]
LibM19160401-V11-02-page16.txt: [('mat-', 'mat')]
LibM19160401-V11-02-page17.txt: [('censor-', 'censor')]
LibM19160401-V11-02-page18.txt: [('Corn-', 'Corn'), ('Postmaster-', 'Postmaster')]
LibM19160401-V11-02-page2.txt: [('-', ''), ('"-', '"'), ('wor-', 'wor'), ('-', ''), ('-', ''), ('fore-', 'fore'), ('prop-', 'prop'), ('-', ''), ('scurril-', 'scurril'), ('mat-', 'mat'), ('decide.-', 'decide.')]
LibM19160401-V11-02-page20.txt: [('-', '')]
LibM19160401-V11-02-page22.txt: [('often-', 'often'), ('mat-', 'mat')]
LibM19160401-V11-02-page24.txt: [('omis-', 'omis')]
LibM19160401-V11-02-page26.txt: [('-', ''), ("'-.-", "'-."), ('-.--.-', '.--.-'), ('.-', '.'), ('.-', '.'), ('-f.', 'f.'), ('Or-', 'Or'), ('-', ''), ('-', ''), ('-.-', '.-'), ('-', ''), ("'-", "'"), ('-', ''), ('.....-', '.....'), ('r-', 'r'), ('-', ''), ('----', '---'), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ("-'", "'"), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('"-', '"'), ('--', '-'), ('------.---', '-----.---'), ('...-', '...'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('..-.-.-', '..-.-.'), ('f\'"-----', 'f\'"----'), ('-...-.', '...-.'), ('"-..-..-', '"-..-..'), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('--', '-'), ('-..-', '..-'), ('-.', '.'), ('-....', '....'), ("----'", "---'"), ('--..', '-..')]
LibM19160401-V11-02-page28.txt: [('there-', 'there')]
LibM19160401-V11-02-page29.txt: [('-', '')]
LibM19160401-V11-02-page30.txt: [('Mc-', 'Mc')]
LibM19160401-V11-02-page31.txt: [('.-', '.')]
LibM19160401-V11-02-page32.txt: [('pri-', 'pri')]
LibM19160401-V11-02-page33.txt: [('-', '')]
LibM19160401-V11-02-page36.txt: [('liv-', 'liv')]
LibM19160401-V11-02-page39.txt: [('recog-', 'recog')]
LibM19160401-V11-02-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('mem-', 'mem'), ('there-', 'there')]
LibM19160401-V11-02-page40.txt: [('en-', 'en'), ('-', '')]
LibM19160401-V11-02-page43.txt: [('Pa-', 'Pa')]
LibM19160401-V11-02-page45.txt: [('--', '-')]
LibM19160401-V11-02-page48.txt: [('the-', 'the')]
LibM19160401-V11-02-page49.txt: [('Mc-', 'Mc')]
LibM19160401-V11-02-page5.txt: [('meas-', 'meas')]
LibM19160401-V11-02-page51.txt: [('-', '')]
LibM19160401-V11-02-page6.txt: [('-being', 'being')]
LibM19160401-V11-02-page7.txt: [('-', '')]
LibM19160401-V11-02-page8.txt: [('Congress-', 'Congress')]
LibM19160401-V11-02-page9.txt: [('-', '')]
LibM19160401-V11-02e-page1.txt: [('-', ''), ('-', '')]
LibM19160401-V11-02e-page12.txt: [('Pot-', 'Pot')]
LibM19160401-V11-02e-page14.txt: [('---', '--')]
LibM19160401-V11-02e-page3.txt: [('-', '')]
LibM19160401-V11-02e-page5.txt: [('-be', 'be')]
LibM19160401-V11-02e-page9.txt: [('morals.--', 'morals.-')]
LibM19160701-V11-03-page12.txt: [('-', '')]
LibM19160701-V11-03-page14.txt: [('execu-', 'execu')]
LibM19160701-V11-03-page15.txt: [('legit-', 'legit')]
LibM19160701-V11-03-page16.txt: [('-Rest-in-Seven', 'Rest-in-Seven')]
LibM19160701-V11-03-page18.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('sup-', 'sup')]
LibM19160701-V11-03-page21.txt: [('-', ''), ('-', ''), ('Sec-', 'Sec')]
LibM19160701-V11-03-page22.txt: [('-', '')]
LibM19160701-V11-03-page23.txt: [('-', ''), ('Vat-', 'Vat')]
LibM19160701-V11-03-page26.txt: [('-', ''), ('institu-', 'institu'), ('-', '')]
LibM19160701-V11-03-page28.txt: [('-', ''), ('tem-', 'tem')]
LibM19160701-V11-03-page29.txt: [('deci-', 'deci')]
LibM19160701-V11-03-page3.txt: [('-', '')]
LibM19160701-V11-03-page30.txt: [('remem-', 'remem')]
LibM19160701-V11-03-page31.txt: [('dis-', 'dis'), ('-United', 'United'), ('cur-', 'cur')]
LibM19160701-V11-03-page32.txt: [('-', '')]
LibM19160701-V11-03-page33.txt: [('-', '')]
LibM19160701-V11-03-page34.txt: [('-', '')]
LibM19160701-V11-03-page35.txt: [('-', '')]
LibM19160701-V11-03-page40.txt: [('-from', 'from')]
LibM19160701-V11-03-page41.txt: [('-', '')]
LibM19160701-V11-03-page43.txt: [('-', '')]
LibM19160701-V11-03-page49.txt: [('Postmaster-', 'Postmaster'), ('Postmaster-', 'Postmaster')]
LibM19160701-V11-03-page51.txt: [('-', '')]
LibM19160701-V11-03-page9.txt: [('-', '')]
LibM19161001-V11-04-page1.txt: [('-', '')]
LibM19161001-V11-04-page12.txt: [('superstitions."--', 'superstitions."-')]
LibM19161001-V11-04-page15.txt: [('CHAR-', 'CHAR')]
LibM19161001-V11-04-page16.txt: [('-', '')]
LibM19161001-V11-04-page17.txt: [('Watch-', 'Watch'), ('Postmaster-', 'Postmaster')]
LibM19161001-V11-04-page2.txt: [('inter-', 'inter')]
LibM19161001-V11-04-page20.txt: [('through-', 'through')]
LibM19161001-V11-04-page21.txt: [('-', '')]
LibM19161001-V11-04-page22.txt: [('as-', 'as'), ('-', '')]
LibM19161001-V11-04-page23.txt: [('-', '')]
LibM19161001-V11-04-page24.txt: [('unde-', 'unde'), ('observ-', 'observ')]
LibM19161001-V11-04-page25.txt: [('mil-', 'mil')]
LibM19161001-V11-04-page26.txt: [('-', ''), ('suf-', 'suf')]
LibM19161001-V11-04-page27.txt: [('right-', 'right')]
LibM19161001-V11-04-page33.txt: [('-', '')]
LibM19161001-V11-04-page36.txt: [('stir-', 'stir')]
LibM19161001-V11-04-page39.txt: [('-revived', 'revived')]
LibM19161001-V11-04-page40.txt: [('how-', 'how')]
LibM19161001-V11-04-page41.txt: [('denomi-', 'denomi'), ('-', ''), ('re-', 're')]
LibM19161001-V11-04-page44.txt: [('govern-', 'govern'), ('.-', '.'), ('-', '')]
LibM19161001-V11-04-page45.txt: [('ac-', 'ac')]
LibM19161001-V11-04-page47.txt: [('voy-', 'voy')]
LibM19161001-V11-04-page49.txt: [('Anti-', 'Anti')]
LibM19161001-V11-04-page50.txt: [('-', '')]
LibM19161001-V11-04-page52.txt: [('T-', 'T')]
LibM19161001-V11-04-page6.txt: [('ex-', 'ex')]
LibM19170101-V12-01-page1.txt: [('-', ''), ('-', '')]
LibM19170101-V12-01-page13.txt: [('-', '')]
LibM19170101-V12-01-page14.txt: [('dis-', 'dis')]
LibM19170101-V12-01-page16.txt: [('-', '')]
LibM19170101-V12-01-page19.txt: [('-', ''), ('-', ''), ('DE-', 'DE')]
LibM19170101-V12-01-page2.txt: [('inter-', 'inter'), ('af-', 'af'), ('Ten-', 'Ten'), ('Wat-', 'Wat')]
LibM19170101-V12-01-page23.txt: [('-legislation', 'legislation')]
LibM19170101-V12-01-page27.txt: [('-', '')]
LibM19170101-V12-01-page3.txt: [('Sab-', 'Sab')]
LibM19170101-V12-01-page34.txt: [('religious-', 'religious'), ('-', '')]
LibM19170101-V12-01-page6.txt: [('-as', 'as'), ('re-', 're')]
LibM19170101-V12-01-page7.txt: [('un-', 'un')]
LibM19170101-V12-01-page9.txt: [('-', '')]
LibM19170401-V12-02-page10.txt: [('work-', 'work')]
LibM19170401-V12-02-page11.txt: [('praise-', 'praise')]
LibM19170401-V12-02-page18.txt: [('non-', 'non')]
LibM19170401-V12-02-page21.txt: [('valid-', 'valid')]
LibM19170401-V12-02-page23.txt: [('-', '')]
LibM19170401-V12-02-page27.txt: [('founda-', 'founda')]
LibM19170401-V12-02-page29.txt: [('recog-', 'recog')]
LibM19170401-V12-02-page30.txt: [('Attorney-', 'Attorney')]
LibM19170401-V12-02-page33.txt: [('-observance', 'observance')]
LibM19170401-V12-02-page34.txt: [('-', ''), ('Multi-', 'Multi')]
LibM19170401-V12-02-page35.txt: [('-', ''), ('alfigent-', 'alfigent')]
LibM19170401-V12-02-page5.txt: [('-', ''), ('at-', 'at')]
LibM19170401-V12-02-page7.txt: [('-', '')]
LibM19170401-V12-02-page8.txt: [('in-', 'in')]
LibM19170701-V12-03-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-.....', '.....')]
LibM19170701-V12-03-page12.txt: [('govern-', 'govern')]
LibM19170701-V12-03-page13.txt: [('-', '')]
LibM19170701-V12-03-page14.txt: [('un-', 'un')]
LibM19170701-V12-03-page15.txt: [('.-', '.'), ('-', '')]
LibM19170701-V12-03-page19.txt: [('r.nr--', 'r.nr-')]
LibM19170701-V12-03-page2.txt: [('Mis-', 'Mis')]
LibM19170701-V12-03-page20.txt: [('III.-', 'III.'), ('CXXX.-', 'CXXX.'), ('gover-', 'gover')]
LibM19170701-V12-03-page23.txt: [('---', '--'), ('---', '--')]
LibM19170701-V12-03-page26.txt: [('-', '')]
LibM19170701-V12-03-page28.txt: [('-', '')]
LibM19170701-V12-03-page29.txt: [('-', '')]
LibM19170701-V12-03-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('---', '--'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19170701-V12-03-page31.txt: [('-', '')]
LibM19170701-V12-03-page32.txt: [('-', '')]
LibM19170701-V12-03-page33.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19170701-V12-03-page36.txt: [('-', ''), ('-', ''), ('.-', '.')]
LibM19170701-V12-03-page8.txt: [('-', '')]
LibM19170701-V12-03-page9.txt: [('-', '')]
LibM19171001-V12-04-page10.txt: [('POR-', 'POR'), ('CON-', 'CON')]
LibM19171001-V12-04-page11.txt: [('suav-', 'suav'), ('-their', 'their'), ('unlim-', 'unlim')]
LibM19171001-V12-04-page12.txt: [('-', ''), ('Medo-', 'Medo'), ('-', '')]
LibM19171001-V12-04-page13.txt: [('-', '')]
LibM19171001-V12-04-page16.txt: [('-', '')]
LibM19171001-V12-04-page18.txt: [('P-', 'P'), ('Protestant-', 'Protestant'), ('-o', 'o')]
LibM19171001-V12-04-page19.txt: [('P-', 'P')]
LibM19171001-V12-04-page21.txt: [('effec-', 'effec')]
LibM19171001-V12-04-page23.txt: [('-', ''), ('lines.-', 'lines.')]
LibM19171001-V12-04-page27.txt: [('-', '')]
LibM19171001-V12-04-page28.txt: [('under-', 'under'), ('un-', 'un')]
LibM19171001-V12-04-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19171001-V12-04-page30.txt: [('-r', 'r')]
LibM19171001-V12-04-page34.txt: [('Alleghanies.-', 'Alleghanies.')]
LibM19171001-V12-04-page35.txt: [('Food-', 'Food')]
LibM19171001-V12-04-page7.txt: [('.-', '.')]
LibM19180101-V13-01-page1.txt: [('ress--', 'ress-'), ('er-', 'er')]
LibM19180101-V13-01-page11.txt: [('--', '-')]
LibM19180101-V13-01-page12.txt: [('intro-', 'intro'), ('con-', 'con')]
LibM19180101-V13-01-page17.txt: [('Postmaster-', 'Postmaster'), ('deter-', 'deter')]
LibM19180101-V13-01-page19.txt: [('ar-', 'ar'), ('-', '')]
LibM19180101-V13-01-page24.txt: [('power-', 'power'), ('Ars--', 'Ars-'), ('-', ''), ('enfranchise-', 'enfranchise')]
LibM19180101-V13-01-page28.txt: [('-', '')]
LibM19180101-V13-01-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19180101-V13-01-page31.txt: [('re-', 're')]
LibM19180101-V13-01-page33.txt: [('Anti-', 'Anti')]
LibM19180101-V13-01-page4.txt: [('-', ''), ('-', ''), ('-C', 'C'), ('-s', 's'), ('-', '')]
LibM19180101-V13-01-page6.txt: [('-this', 'this'), ('foun-', 'foun')]
LibM19180101-V13-01-page7.txt: [('can-', 'can')]
LibM19180101-V13-01-page8.txt: [('Rear-', 'Rear')]
LibM19180401-V13-02-page1.txt: [('-', '')]
LibM19180401-V13-02-page12.txt: [('Go-to-', 'Go-to')]
LibM19180401-V13-02-page13.txt: [('.---', '.--'), ('--', '-'), ('-s-', 's-'), ('con-', 'con'), ("-to'i.", "to'i."), ('.-', '.'), ('----', '---'), ('---', '--'), ('--', '-'), ('---', '--'), ('-', ''), ('.-.-', '.-.'), ('-', ''), ('-.', '.'), ('-', ''), ('---', '--'), ('Eng-', 'Eng'), ('-', ''), ('..--', '..-'), ('.f..--', '.f..-'), ('-', ''), ('......--', '......-'), ('---', '--'), ('-.', '.'), ('---', '--'), ('-', ''), ("-----'---..-", "----'---..-"), ('-...', '...'), ('.-.-.-.-', '.-.-.-.'), ('-', ''), ('.-', '.'), ('-.-."', '.-."')]
LibM19180401-V13-02-page16.txt: [('Je-', 'Je')]
LibM19180401-V13-02-page17.txt: [('-', '')]
LibM19180401-V13-02-page19.txt: [('free-', 'free')]
LibM19180401-V13-02-page20.txt: [('-', ''), ('-', '')]
LibM19180401-V13-02-page21.txt: [('-', ''), ('-', '')]
LibM19180401-V13-02-page22.txt: [('-', ''), ('-Palestine', 'Palestine'), ('-', '')]
LibM19180401-V13-02-page23.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19180401-V13-02-page24.txt: [('MASSA-', 'MASSA')]
LibM19180401-V13-02-page26.txt: [('Co-', 'Co')]
LibM19180401-V13-02-page28.txt: [('-', ''), ('-', '')]
LibM19180401-V13-02-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19180401-V13-02-page30.txt: [('-', '')]
LibM19180401-V13-02-page35.txt: [('-', ''), ('-', ''), ('-', ''), ('r-', 'r'), ('---', '--')]
LibM19180401-V13-02-page5.txt: [('thered.-', 'thered.')]
LibM19180701-V13-03-page1.txt: [('-', '')]
LibM19180701-V13-03-page14.txt: [('-', '')]
LibM19180701-V13-03-page16.txt: [('-', ''), ('-', '')]
LibM19180701-V13-03-page17.txt: [('-', '')]
LibM19180701-V13-03-page18.txt: [('af-', 'af')]
LibM19180701-V13-03-page25.txt: [('apes-', 'apes')]
LibM19180701-V13-03-page29.txt: [('Jean-', 'Jean')]
LibM19180701-V13-03-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19180701-V13-03-page30.txt: [('time..--', 'time..-')]
LibM19180701-V13-03-page34.txt: [('cog-', 'cog')]
LibM19180701-V13-03-page6.txt: [('fol-', 'fol')]
LibM19180701-V13-03-page8.txt: [('-', '')]
LibM19181001-V13-04-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19181001-V13-04-page11.txt: [('lit-', 'lit'), ('Sun-', 'Sun'), ('Ordi-', 'Ordi')]
LibM19181001-V13-04-page13.txt: [('democ-', 'democ'), ('it-', 'it')]
LibM19181001-V13-04-page14.txt: [('af-', 'af')]
LibM19181001-V13-04-page16.txt: [('ap-', 'ap')]
LibM19181001-V13-04-page18.txt: [('fore-', 'fore')]
LibM19181001-V13-04-page19.txt: [('auto-', 'auto')]
LibM19181001-V13-04-page2.txt: [('-.', '.'), ('.-', '.'), ('pre-', 'pre'), ('Ida-', 'Ida'), ('af-', 'af')]
LibM19181001-V13-04-page20.txt: [('peril-', 'peril'), ('be-', 'be')]
LibM19181001-V13-04-page23.txt: [('-', '')]
LibM19181001-V13-04-page24.txt: [('-', ''), ('-', '')]
LibM19181001-V13-04-page25.txt: [('-ruled', 'ruled')]
LibM19181001-V13-04-page28.txt: [('-', '')]
LibM19181001-V13-04-page29.txt: [('-', ''), ('Assoeia-', 'Assoeia')]
LibM19181001-V13-04-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19181001-V13-04-page30.txt: [('One-', 'One')]
LibM19181001-V13-04-page34.txt: [('reli-', 'reli')]
LibM19181001-V13-04-page5.txt: [('call-', 'call')]
LibM19181001-V13-04-page6.txt: [('-', ''), ('-', '')]
LibM19181001-V13-04-page7.txt: [('en-', 'en')]
LibM19181001-V13-04-page8.txt: [('Declara-', 'Declara')]
LibM19190101-V15-01-page1.txt: [('-', '')]
LibM19190101-V15-01-page12.txt: [('-A', 'A')]
LibM19190101-V15-01-page15.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19190101-V15-01-page17.txt: [('-', '')]
LibM19190101-V15-01-page18.txt: [('Wil-', 'Wil')]
LibM19190101-V15-01-page2.txt: [('Ida-', 'Ida'), ('T"-', 'T"')]
LibM19190101-V15-01-page21.txt: [('-religions', 'religions')]
LibM19190101-V15-01-page22.txt: [('-', '')]
LibM19190101-V15-01-page23.txt: [('-', '')]
LibM19190101-V15-01-page28.txt: [('-.', '.'), ('i"----', 'i"---'), ('-j', 'j'), ('-', ''), ('-', ''), ('-e-', 'e-'), ('-', ''), ('-of', 'of')]
LibM19190101-V15-01-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19190101-V15-01-page5.txt: [('thereof.-', 'thereof.'), ('COUN-', 'COUN'), ('-', '')]
LibM19190401-V15-02-page1.txt: [('-', '')]
LibM19190401-V15-02-page10.txt: [('..-', '..'), ('pro-', 'pro')]
LibM19190401-V15-02-page12.txt: [('i-', 'i'), ('....-', '....')]
LibM19190401-V15-02-page13.txt: [('neigh-', 'neigh')]
LibM19190401-V15-02-page15.txt: [('to-', 'to'), ('-ether.', 'ether.')]
LibM19190401-V15-02-page17.txt: [('-', ''), ('RE-', 'RE'), ('pro-', 'pro')]
LibM19190401-V15-02-page18.txt: [('Medo-', 'Medo'), ('constrain-', 'constrain')]
LibM19190401-V15-02-page19.txt: [('repub-', 'repub'), ('inter-', 'inter'), ('power-', 'power'), ('Dan-', 'Dan'), ('-', '')]
LibM19190401-V15-02-page2.txt: [('-cl', 'cl')]
LibM19190401-V15-02-page21.txt: [('jit-', 'jit')]
LibM19190401-V15-02-page22.txt: [('-', '')]
LibM19190401-V15-02-page23.txt: [('-', '')]
LibM19190401-V15-02-page28.txt: [('-', '')]
LibM19190401-V15-02-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19190401-V15-02-page7.txt: [('major-', 'major')]
LibM19190401-V15-02-page9.txt: [('-.', '.')]
LibM19190701-V15-03-page10.txt: [('-', '')]
LibM19190701-V15-03-page11.txt: [('Re-', 'Re')]
LibM19190701-V15-03-page12.txt: [('-', ''), ('pun-', 'pun')]
LibM19190701-V15-03-page13.txt: [('con-', 'con')]
LibM19190701-V15-03-page18.txt: [('com-', 'com')]
LibM19190701-V15-03-page2.txt: [('inter-', 'inter'), ('Of-', 'Of'), ('affil-', 'affil'), ('Co-', 'Co')]
LibM19190701-V15-03-page21.txt: [('-E', 'E'), ('Ite-', 'Ite'), ('-', ''), ('-', ''), ('-', ''), ('pa-', 'pa'), ('-', '')]
LibM19190701-V15-03-page22.txt: [('-', '')]
LibM19190701-V15-03-page25.txt: [('---', '--'), ('-r--', 'r--'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19190701-V15-03-page28.txt: [('President.\'"--', 'President.\'"-'), ('---', '--')]
LibM19190701-V15-03-page30.txt: [('believ-', 'believ')]
LibM19190701-V15-03-page31.txt: [('op-', 'op')]
LibM19190701-V15-03-page34.txt: [('-', '')]
LibM19190701-V15-03-page36.txt: [('f-', 'f'), ('-ewikik', 'ewikik')]
LibM19190701-V15-03-page5.txt: [('Kt-', 'Kt'), ('-.', '.'), ('--', '-'), ('--', '-'), ('-', '')]
LibM19190701-V15-03-page6.txt: [('--', '-')]
LibM19190701-V15-03-page7.txt: [('Jef-', 'Jef')]
LibM19190701-V15-03-page9.txt: [('con-', 'con')]
LibM19191001-V15-04-page11.txt: [('Con-', 'Con')]
LibM19191001-V15-04-page15.txt: [('-', ''), ('-B.', 'B.')]
LibM19191001-V15-04-page17.txt: [('-', ''), ('-', ''), ('non-', 'non'), ('en-', 'en')]
LibM19191001-V15-04-page21.txt: [('sur-', 'sur')]
LibM19191001-V15-04-page27.txt: [('-', ''), ('-', '')]
LibM19191001-V15-04-page28.txt: [('f-', 'f')]
LibM19191001-V15-04-page5.txt: [('-', '')]
LibM19191001-V15-04-page7.txt: [('pub-', 'pub')]
LibM19191001-V15-04-page8.txt: [('Massa-', 'Massa'), ('re-', 're')]
LibM19200101-V14-01-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19200101-V14-01-page12.txt: [('ESTAB-', 'ESTAB')]
LibM19200101-V14-01-page15.txt: [('-', '')]
LibM19200101-V14-01-page16.txt: [('-', ''), ('-', '')]
LibM19200101-V14-01-page17.txt: [('reli-', 'reli'), ('MUTCH-', 'MUTCH')]
LibM19200101-V14-01-page18.txt: [('cir-', 'cir')]
LibM19200101-V14-01-page20.txt: [('pa-', 'pa')]
LibM19200101-V14-01-page22.txt: [('-', ''), ('-', ''), ('-', ''), ('Na-', 'Na'), ('---', '--')]
LibM19200101-V14-01-page32.txt: [('-', '')]
LibM19200101-V14-01-page5.txt: [('fol-', 'fol')]
LibM19200101-V14-01-page6.txt: [('-', ''), ('espe-', 'espe'), ('At-', 'At')]
LibM19200101-V14-01-page7.txt: [('re-', 're'), ('-', '')]
LibM19200101-V14-01-page8.txt: [('-', '')]
LibM19200401-V14-02-page10.txt: [('ban-', 'ban')]
LibM19200401-V14-02-page11.txt: [('denorai-', 'denorai'), ('-', '')]
LibM19200401-V14-02-page13.txt: [('-', ''), ('-', ''), ('na-', 'na')]
LibM19200401-V14-02-page14.txt: [('Sun-', 'Sun'), ('-', '')]
LibM19200401-V14-02-page19.txt: [('Com-', 'Com'), ('-', '')]
LibM19200401-V14-02-page21.txt: [('com-', 'com')]
LibM19200401-V14-02-page22.txt: [('-', '')]
LibM19200401-V14-02-page29.txt: [('-at', 'at')]
LibM19200401-V14-02-page31.txt: [('---', '--'), ('a-', 'a')]
LibM19200401-V14-02-page35.txt: [('--', '-'), ('-', ''), ('-', ''), ('--', '-'), ('kt-', 'kt')]
LibM19200401-V14-02-page5.txt: [('thereof.-', 'thereof.'), ('-', ''), ('un-', 'un')]
LibM19200401-V14-02-page6.txt: [('amuse-', 'amuse'), ('re-', 're'), ('--', '-')]
LibM19200401-V14-02-page8.txt: [('Wheel-', 'Wheel'), ('advo-', 'advo')]
LibM19200401-V14-02-page9.txt: [('com-', 'com')]
LibM19200701-V14-03-page10.txt: [('an-', 'an')]
LibM19200701-V14-03-page12.txt: [('-', ''), ('-', ''), ('-', ''), ('-.', '.')]
LibM19200701-V14-03-page14.txt: [('unmistaka-', 'unmistaka'), ('ar-', 'ar'), ('-c', 'c')]
LibM19200701-V14-03-page15.txt: [('-and', 'and'), ('-..', '..'), ('Sunday-', 'Sunday'), ('-iii', 'iii')]
LibM19200701-V14-03-page16.txt: [('-', ''), ('Lib-', 'Lib')]
LibM19200701-V14-03-page17.txt: [('-', ''), ('-', ''), ('-', ''), ('iVi-', 'iVi')]
LibM19200701-V14-03-page2.txt: [('affil-', 'affil')]
LibM19200701-V14-03-page20.txt: [('-', ''), ('-', ''), ('rafarowi-erivirorre-', 'rafarowi-erivirorre'), ('-mititayerwiriiiinicrierier-rimorwai-weiverreitaararforreahaarivitoroyerriiivii', 'mititayerwiriiiinicrierier-rimorwai-weiverreitaararforreahaarivitoroyerriiivii')]
LibM19200701-V14-03-page24.txt: [('-', ''), ('--------', '-------')]
LibM19200701-V14-03-page26.txt: [('un-', 'un')]
LibM19200701-V14-03-page31.txt: [('-weesie', 'weesie'), ('Llimtstoo-', 'Llimtstoo'), ('plain-', 'plain')]
LibM19200701-V14-03-page32.txt: [('-', '')]
LibM19200701-V14-03-page33.txt: [('-', ''), ('Con-', 'Con')]
LibM19200701-V14-03-page36.txt: [('-', ''), ('-', '')]
LibM19200701-V14-03-page5.txt: [('-i-Ifidairicliiiriiirroi', 'i-Ifidairicliiiriiirroi')]
LibM19200701-V14-03-page7.txt: [('-', '')]
LibM19200701-V14-03-page8.txt: [('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-vol', 'vol'), ('-i-', 'i-'), ('wofriw-', 'wofriw'), ('-iv-.-ii.-', 'iv-.-ii.-')]
LibM19201001-V14-04-page14.txt: [('Amer-', 'Amer')]
LibM19201001-V14-04-page15.txt: [('un-', 'un')]
LibM19201001-V14-04-page16.txt: [('-', ''), ('por-', 'por')]
LibM19201001-V14-04-page18.txt: [('un-', 'un')]
LibM19201001-V14-04-page19.txt: [('un-', 'un'), ('Fugitive-', 'Fugitive')]
LibM19201001-V14-04-page21.txt: [('in-', 'in')]
LibM19201001-V14-04-page22.txt: [('Vice-', 'Vice'), ('-of', 'of')]
LibM19201001-V14-04-page23.txt: [('-', '')]
LibM19201001-V14-04-page26.txt: [('-', '')]
LibM19201001-V14-04-page27.txt: [('neg-', 'neg')]
LibM19201001-V14-04-page30.txt: [('-', '')]
LibM19201001-V14-04-page34.txt: [('en-', 'en')]
LibM19201001-V14-04-page4.txt: [('-MASS.', 'MASS.')]
LibM19201001-V14-04-page5.txt: [('-', '')]
LibM19201001-V14-04-page6.txt: [('-', '')]
LibM19201001-V14-04-page7.txt: [('Fed-', 'Fed'), ('n-', 'n'), ('---', '--'), ('"--r-f-', '"--r-f'), ('-', ''), ('eXti-', 'eXti')]
LibM19201001-V14-04-page9.txt: [('-', '')]
In [20]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction3

Average verified rate: 0.9808327456835285

Average of error rates: 0.03449303008070433

Total token count: 1452112

In [21]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[21]:
[("'", 1601),
 ('m', 1336),
 ('d', 1255),
 ('e', 1005),
 ('w', 956),
 ('t', 838),
 ('n', 784),
 ('r', 684),
 ('f', 634),
 ('g', 385),
 ('x', 271),
 ('u', 208),
 ('k', 192),
 ('tv', 150),
 ('th', 121),
 ('pa', 104),
 ('sunday-law', 92),
 ('re', 89),
 ('z', 82),
 ('ex', 77),
 ('co', 74),
 ('io', 72),
 ('id', 71),
 ('postmaster-general', 62),
 ('mo', 62),
 ('ga', 58),
 ('post-offices', 57),
 ('un', 57),
 ('un-american', 57),
 ('va', 56),
 ('statute-books', 56),
 ('sunday-closing', 54),
 ('church-and-state', 49),
 ('tion', 45),
 ('mm', 45),
 ('q', 44),
 ('li', 43),
 ('mt', 42),
 ('attorney-general', 41),
 ('sunday-rest', 39),
 ('wm', 38),
 ('pp', 38),
 ('mi', 37),
 ('charta', 37),
 ('ro', 37),
 ('mc', 33),
 ('ri', 31),
 ('neander', 31),
 ('al', 31),
 ('-', 30)]

Correction 4 -- Remove extra quotation marks

In [22]:
# %load shared_elements/replace_extra_quotation_marks.py
prev = cycle
cycle = "correction4"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    corrections = []
    for token in tokens:
        token_list = list(token)
        last_char = token_list[-1]

        if last_char is "'":
            if len(token) > 1:
                if token_list[-2] is 's' or 'S':
                    pass
                else:
                    corrections.append((token, re.sub(r"'", r"", token)))
            else:
                pass
        elif token[0] is "'":
            corrections.append((token, re.sub(r"'", r"", token)))   
        else:
            pass
    
    if len(corrections) > 0:
        print('{}: {}'.format(filename, corrections))

        for correction in corrections:
            content = clean.replace_pair(correction, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
LibM19060401-V01-01-page20.txt: [("'bound", 'bound')]
LibM19060401-V01-01-page22.txt: [("'co", 'co')]
LibM19060401-V01-01-page25.txt: [("'brought", 'brought')]
LibM19060401-V01-01-page29.txt: [("'Sunday", 'Sunday'), ("'hundred", 'hundred')]
LibM19060401-V01-01-page31.txt: [("'Concerning", 'Concerning'), ("'Connecticut", 'Connecticut'), ("'hearkened", 'hearkened'), ("'brother", 'brother'), ("'bow", 'bow')]
LibM19060401-V01-01-page33.txt: [("'CORTELYOU", 'CORTELYOU')]
LibM19060401-V01-01-page34.txt: [("'advocate", 'advocate'), ("'and", 'and'), ("'contrast", 'contrast')]
LibM19060401-V01-01-page35.txt: [("'e", 'e')]
LibM19060401-V01-01-page7.txt: [("'belief", 'belief')]
LibM19060401-V01-01-page8.txt: [("'Caesar", 'Caesar')]
LibM19060401-V01-01-page9.txt: [("'by", 'by')]
LibM19060701-V01-02-page21.txt: [("'twixt", 'twixt')]
LibM19061001-V01-03-page18.txt: [("'corresponding", 'corresponding')]
LibM19061001-V01-03-page22.txt: [('\'"', '"')]
LibM19061001-V01-03-page23.txt: [("'fragile", 'fragile'), ("'Where", 'Where')]
LibM19061001-V01-03-page24.txt: [("'ir", 'ir')]
LibM19061001-V01-03-page25.txt: [("'Brewer", 'Brewer'), ("'factories", 'factories'), ("'prohibited", 'prohibited')]
LibM19061001-V01-03-page29.txt: [("'of", 'of')]
LibM19061001-V01-03-page32.txt: [("'Judge", 'Judge'), ("''Sou.", 'Sou.')]
LibM19061001-V01-03-page8.txt: [("'to", 'to')]
LibM19070101-V02-01-page12.txt: [("'.", '.')]
LibM19070101-V02-01-page17.txt: [("'it", 'it')]
LibM19070101-V02-01-page20.txt: [("'work", 'work')]
LibM19070101-V02-01-page24.txt: [("'monstrous", 'monstrous')]
LibM19070101-V02-01-page32.txt: [("'Zealous", 'Zealous')]
LibM19070101-V02-01-page34.txt: [("'burning", 'burning')]
LibM19070401-V02-02-page12.txt: [("'on", 'on')]
LibM19070401-V02-02-page2.txt: [("'twas", 'twas')]
LibM19070401-V02-02-page25.txt: [("'they", 'they'), ("'the", 'the')]
LibM19070401-V02-02-page27.txt: [("'il", 'il')]
LibM19070401-V02-02-page28.txt: [("'said", 'said'), ("'They", 'They')]
LibM19070401-V02-02-page29.txt: [("'voluntarily", 'voluntarily')]
LibM19070401-V02-02-page3.txt: [("'Bless", 'Bless')]
LibM19070401-V02-02-page6.txt: [("'disobey", 'disobey')]
LibM19070701-V02-03-page1.txt: [("'a", 'a')]
LibM19070701-V02-03-page13.txt: [("'Demand", 'Demand')]
LibM19070701-V02-03-page25.txt: [("'to", 'to')]
LibM19070701-V02-03-page27.txt: [("'earnest", 'earnest')]
LibM19070701-V02-03-page31.txt: [("'uses", 'uses')]
LibM19070701-V02-03-page33.txt: [("'one", 'one')]
LibM19070701-V02-03-page6.txt: [("'time", 'time')]
LibM19071001-V02-04-page11.txt: [("'Bishop", 'Bishop'), ("'press", 'press')]
LibM19071001-V02-04-page31.txt: [("'I", 'I')]
LibM19071001-V02-04-page50.txt: [("'legislation", 'legislation')]
LibM19071001-V02-04-page51.txt: [("'Ier", 'Ier'), ("'pr..", 'pr..'), ("'Isom", 'Isom'), ("'rotor", 'rotor'), ("'.", '.'), ("'en", 'en'), ("'...", '...'), ("'rryn", 'rryn'), ("'hot", 'hot')]
LibM19071001-V02-04-page7.txt: [("'because", 'because')]
LibM19071001-V02-04-page8.txt: [("'In", 'In')]
LibM19080101-V03-01-page22.txt: [("'religionaboveall", 'religionaboveall')]
LibM19080101-V03-01-page23.txt: [("'of", 'of'), ("'tween", 'tween')]
LibM19080101-V03-01-page42.txt: [("'the", 'the')]
LibM19080401-V03-02-page1.txt: [('\'".', '".')]
LibM19080401-V03-02-page18.txt: [("'A", 'A')]
LibM19080401-V03-02-page31.txt: [("'or", 'or'), ("'from", 'from')]
LibM19080401-V03-02-page33.txt: [("'Tis", 'Tis')]
LibM19080701-V03-03-page11.txt: [("'ago", 'ago')]
LibM19080701-V03-03-page43.txt: [("'Sunday", 'Sunday')]
LibM19080701-V03-03-page52.txt: [("'um", 'um')]
LibM19081001-V03-04-page33.txt: [("'we", 'we')]
LibM19090101-V04-01-page15.txt: [("'honor", 'honor')]
LibM19090101-V04-01-page44.txt: [("'s", 's')]
LibM19090101-V04-01-page49.txt: [("'A", 'A')]
LibM19090401-V04-02-page1.txt: [("''.", '.')]
LibM19090401-V04-02-page2.txt: [("'.", '.'), ("'r", 'r')]
LibM19090401-V04-02-page20.txt: [("'religious", 'religious')]
LibM19090401-V04-02-page24.txt: [("'the", 'the')]
LibM19090401-V04-02-page32.txt: [("'a", 'a')]
LibM19090401-V04-02-page45.txt: [("'fio", 'fio'), ("'.", '.')]
LibM19090401-V04-02-page48.txt: [("'UNTIL", 'UNTIL')]
LibM19090401-V04-02-page49.txt: [("'A", 'A')]
LibM19090401-V04-02-page51.txt: [("'ARIZ", 'ARIZ')]
LibM19090701-V04-03-page1.txt: [("'''....", '....')]
LibM19090701-V04-03-page38.txt: [("'moment", 'moment')]
LibM19090701-V04-03-page43.txt: [("'for", 'for')]
LibM19090701-V04-03-page44.txt: [("'WET'andtRIY", 'WETandtRIY')]
LibM19090701-V04-03-page45.txt: [("'GIP", 'GIP')]
LibM19090701-V04-03-page49.txt: [("'iples", 'iples')]
LibM19091001-V04-04-page14.txt: [("'background.", 'background.')]
LibM19091001-V04-04-page39.txt: [("'instance", 'instance')]
LibM19091001-V04-04-page40.txt: [("'our", 'our')]
LibM19091001-V04-04-page46.txt: [("'much", 'much')]
LibM19091001-V04-04-page5.txt: [("'of", 'of')]
LibM19100101-V05-01-page10.txt: [("'its", 'its')]
LibM19100101-V05-01-page11.txt: [("'of", 'of'), ("'been", 'been')]
LibM19100101-V05-01-page12.txt: [("'now", 'now')]
LibM19100101-V05-01-page17.txt: [("'of", 'of')]
LibM19100101-V05-01-page19.txt: [("'siderable", 'siderable')]
LibM19100101-V05-01-page20.txt: [("'day", 'day')]
LibM19100101-V05-01-page25.txt: [("'why", 'why')]
LibM19100101-V05-01-page31.txt: [("'AMOR", 'AMOR')]
LibM19100101-V05-01-page34.txt: [("'Such", 'Such')]
LibM19100101-V05-01-page39.txt: [("'profound", 'profound')]
LibM19100401-V05-02-page14.txt: [("'for", 'for')]
LibM19100401-V05-02-page24.txt: [("'s", 's')]
LibM19100401-V05-02-page27.txt: [("'resident", 'resident')]
LibM19100401-V05-02-page28.txt: [("'from", 'from')]
LibM19100401-V05-02-page32.txt: [("'for", 'for')]
LibM19100401-V05-02-page7.txt: [("'doubt", 'doubt')]
LibM19100701-V05-03-page1.txt: [("'PIN", 'PIN')]
LibM19100701-V05-03-page44.txt: [("'together", 'together')]
LibM19100701-V05-03-page9.txt: [("'demanding", 'demanding')]
LibM19101001-V05-04-page10.txt: [("'direct", 'direct')]
LibM19101001-V05-04-page34.txt: [("'of", 'of'), ("'replies", 'replies')]
LibM19101001-V05-04-page36.txt: [("'Amore", 'Amore')]
LibM19101001-V05-04-page49.txt: [("'MAGAZINE", 'MAGAZINE')]
LibM19110101-V06-01-page10.txt: [("'wants", 'wants')]
LibM19110101-V06-01-page15.txt: [("'demanding", 'demanding')]
LibM19110101-V06-01-page17.txt: [("'shall", 'shall')]
LibM19110101-V06-01-page34.txt: [("'debates", 'debates')]
LibM19110101-V06-01-page35.txt: [("'with", 'with')]
LibM19110101-V06-01-page37.txt: [("'uniting", 'uniting')]
LibM19110101-V06-01-page39.txt: [("'as", 'as')]
LibM19110101-V06-01-page42.txt: [("'Adventists", 'Adventists')]
LibM19110101-V06-01-page45.txt: [("'neath", 'neath')]
LibM19110101-V06-01-page49.txt: [("'St", 'St')]
LibM19110101-V06-01-page5.txt: [("'enforce", 'enforce')]
LibM19110101-V06-01-page9.txt: [("'by", 'by')]
LibM19110401-V06-02-page1.txt: [("'apple", 'apple'), ("'ftIfl", 'ftIfl')]
LibM19110401-V06-02-page26.txt: [("'science", 'science'), ("'of", 'of')]
LibM19110701-V06-03-page2.txt: [("'liberty", 'liberty')]
LibM19110701-V06-03-page21.txt: [("'.", '.')]
LibM19110701-V06-03-page25.txt: [("'eagle", 'eagle')]
LibM19110701-V06-03-page29.txt: [("'of", 'of')]
LibM19110701-V06-03-page30.txt: [("'goo", 'goo')]
LibM19110701-V06-03-page4.txt: [("'painaas", 'painaas')]
LibM19110701-V06-03-page42.txt: [("'and", 'and')]
LibM19110701-V06-03-page46.txt: [("'the", 'the')]
LibM19111001-V06-04-page17.txt: [('\'"', '"')]
LibM19111001-V06-04-page19.txt: [("'positively", 'positively'), ("'V", 'V')]
LibM19111001-V06-04-page20.txt: [("'the", 'the')]
LibM19111001-V06-04-page30.txt: [("'liberties", 'liberties')]
LibM19111001-V06-04-page41.txt: [("'to", 'to')]
LibM19111001-V06-04-page52.txt: [("'Writings", 'Writings')]
LibM19120101-V07-01-page10.txt: [("'with", 'with'), ("'hardly", 'hardly')]
LibM19120101-V07-01-page22.txt: [("'and", 'and')]
LibM19120101-V07-01-page26.txt: [("'child", 'child')]
LibM19120101-V07-01-page27.txt: [("'RESIDENT", 'RESIDENT')]
LibM19120101-V07-01-page29.txt: [("'if", 'if')]
LibM19120101-V07-01-page30.txt: [("'s", 's')]
LibM19120101-V07-01-page31.txt: [("'be", 'be')]
LibM19120401-V07-02-page28.txt: [("'rections", 'rections'), ("'effect", 'effect')]
LibM19120401-V07-02-page32.txt: [("'Catholics", 'Catholics')]
LibM19120401-V07-02-page35.txt: [("'of", 'of'), ("'be", 'be')]
LibM19120701-V07-03-page4.txt: [("'.", '.'), ("'ma", 'ma'), ("'.Z....", '.Z....'), ("'..", '..'), ("'gut.", 'gut.'), ("'.", '.')]
LibM19120701-V07-03-page40.txt: [("'with", 'with')]
LibM19120701-V07-03-page42.txt: [("'of", 'of')]
LibM19120701-V07-03-page51.txt: [("'GAZINE", 'GAZINE')]
LibM19120701-V07-03-page52.txt: [("'Ne", 'Ne')]
LibM19120701-V07-03-page8.txt: [("'twixt", 'twixt')]
LibM19121001-V07-04-page26.txt: [("'tat", 'tat')]
LibM19121001-V07-04-page6.txt: [("'.", '.'), ("'aroe", 'aroe'), ("'ammo", 'ammo'), ("'Meow", 'Meow')]
LibM19130101-V08-01-page15.txt: [("'I-JAMES", 'I-JAMES')]
LibM19130101-V08-01-page2.txt: [("'Religious", 'Religious')]
LibM19130101-V08-01-page22.txt: [("'the", 'the')]
LibM19130101-V08-01-page23.txt: [("'religious", 'religious')]
LibM19130101-V08-01-page31.txt: [("'avoid", 'avoid')]
LibM19130101-V08-01-page42.txt: [("'en.", 'en.'), ("'ode", 'ode')]
LibM19130101-V08-01-page5.txt: [("'White", 'White')]
LibM19130401-V08-02-page13.txt: [("'provided", 'provided')]
LibM19130401-V08-02-page31.txt: [("'of", 'of')]
LibM19130401-V08-02-page32.txt: [("'to", 'to')]
LibM19130401-V08-02-page34.txt: [("'let", 'let')]
LibM19130401-V08-02-page38.txt: [("'Tis", 'Tis')]
LibM19130401-V08-02-page49.txt: [("'Society", 'Society')]
LibM19130701-V08-03-page27.txt: [("'C.'''.", 'C..'), ("'.....", '.....')]
LibM19130701-V08-03-page50.txt: [("'wishing", 'wishing')]
LibM19130701-V08-03-page51.txt: [("'WASH", 'WASH')]
LibM19131001-V08-04-page12.txt: [("'first", 'first')]
LibM19131001-V08-04-page13.txt: [("'of", 'of')]
LibM19131001-V08-04-page25.txt: [("'ay", 'ay')]
LibM19131001-V08-04-page3.txt: [("'This", 'This'), ("'.", '.')]
LibM19131001-V08-04-page4.txt: [("'OVID.", 'OVID.')]
LibM19131001-V08-04-page41.txt: [("'so", 'so')]
LibM19131001-V08-04-page5.txt: [("'on", 'on')]
LibM19131001-V08-04-page52.txt: [("'.", '.')]
LibM19140101-V09-01-page14.txt: [("'give", 'give')]
LibM19140101-V09-01-page15.txt: [("'just", 'just')]
LibM19140101-V09-01-page23.txt: [("'and", 'and')]
LibM19140101-V09-01-page31.txt: [("'i'i", 'ii'), ("'I.", 'I.'), ("'..i", '..i')]
LibM19140101-V09-01-page42.txt: [("'God", 'God')]
LibM19140101-V09-01-page43.txt: [("'once", 'once')]
LibM19140101-V09-01-page56.txt: [("'VA", 'VA'), ("'Nit", 'Nit')]
LibM19140401-V09-02-page15.txt: [("'contrary", 'contrary')]
LibM19140401-V09-02-page20.txt: [("'state", 'state')]
LibM19140401-V09-02-page23.txt: [("'shave", 'shave')]
LibM19140401-V09-02-page4.txt: [("''t", 't')]
LibM19140701-V09-03-page11.txt: [("'tis", 'tis')]
LibM19140701-V09-03-page18.txt: [("'prohibit", 'prohibit')]
LibM19140701-V09-03-page19.txt: [("'The", 'The')]
LibM19140701-V09-03-page26.txt: [("'orris", 'orris')]
LibM19140701-V09-03-page28.txt: [("'the", 'the')]
LibM19140701-V09-03-page29.txt: [("'as", 'as')]
LibM19140701-V09-03-page36.txt: [("'riot", 'riot')]
LibM19140701-V09-03-page37.txt: [("'The", 'The')]
LibM19140701-V09-03-page4.txt: [("'UT", 'UT'), ("'esired.", 'esired.')]
LibM19140701-V09-03-page40.txt: [("'elected", 'elected')]
LibM19140701-V09-03-page45.txt: [("'in", 'in')]
LibM19140701-V09-03-page49.txt: [("'t", 't'), ("'t", 't'), ('\'\'"', '"'), ("'TX", 'TX')]
LibM19140701-V09-03-page51.txt: [("'mon", 'mon')]
LibM19141001-V09-04-page11.txt: [("'a", 'a')]
LibM19141001-V09-04-page13.txt: [("'now", 'now')]
LibM19141001-V09-04-page24.txt: [("'IiE", 'IiE')]
LibM19141001-V09-04-page32.txt: [("'hung", 'hung')]
LibM19141001-V09-04-page35.txt: [("'IV", 'IV')]
LibM19141001-V09-04-page40.txt: [("'enjoy", 'enjoy')]
LibM19141001-V09-04-page45.txt: [("'the", 'the')]
LibM19141001-V09-04-page46.txt: [("'act", 'act')]
LibM19141001-V09-04-page50.txt: [("'M", 'M'), ("'N", 'N'), ("'C", 'C')]
LibM19141001-V09-04-page51.txt: [("'.", '.')]
LibM19141001-V09-04-page52.txt: [("'AK.", 'AK.'), ("'CY", 'CY')]
LibM19141001-V09-04-page9.txt: [("'at", 'at')]
LibM19150101-V10-01-page11.txt: [("'Liberty", 'Liberty')]
LibM19150101-V10-01-page14.txt: [("'a", 'a')]
LibM19150101-V10-01-page21.txt: [("'thus", 'thus')]
LibM19150101-V10-01-page24.txt: [("'the", 'the'), ("'to", 'to')]
LibM19150101-V10-01-page34.txt: [("'Tis", 'Tis')]
LibM19150101-V10-01-page38.txt: [("'fallacy", 'fallacy')]
LibM19150101-V10-01-page48.txt: [("'thereby", 'thereby')]
LibM19150101-V10-01-page52.txt: [("'M", 'M')]
LibM19150101-V10-01-page53.txt: [("'comet", 'comet'), ("'Protestant", 'Protestant')]
LibM19150401-V10-02-page19.txt: [("'directed", 'directed')]
LibM19150401-V10-02-page22.txt: [("'recourse", 'recourse')]
LibM19150401-V10-02-page36.txt: [("'s", 's')]
LibM19150401-V10-02-page39.txt: [("'Upon", 'Upon')]
LibM19150401-V10-02-page46.txt: [("'O", 'O')]
LibM19150401-V10-02-page6.txt: [("'IMN", 'IMN'), ('\'"Ar', '"Ar')]
LibM19150701-V10-03-page12.txt: [("'at", 'at')]
LibM19150701-V10-03-page15.txt: [("'twixti", 'twixti')]
LibM19150701-V10-03-page20.txt: [("'citizens", 'citizens')]
LibM19150701-V10-03-page26.txt: [("'Ipon", 'Ipon')]
LibM19150701-V10-03-page33.txt: [("'defend", 'defend')]
LibM19150701-V10-03-page42.txt: [("'a", 'a')]
LibM19150701-V10-03-page43.txt: [("'in", 'in')]
LibM19151001-V10-04-page11.txt: [("'publish", 'publish')]
LibM19151001-V10-04-page20.txt: [("'Part", 'Part')]
LibM19151001-V10-04-page21.txt: [("'personal", 'personal')]
LibM19151001-V10-04-page22.txt: [("'duty", 'duty')]
LibM19151001-V10-04-page25.txt: [("'buries", 'buries')]
LibM19151001-V10-04-page28.txt: [("'Twixt", 'Twixt')]
LibM19151001-V10-04-page47.txt: [("'immutable", 'immutable')]
LibM19151001-V10-04-page51.txt: [("'Vs", 'Vs')]
LibM19160101-V11-01-page11.txt: [("'union", 'union')]
LibM19160101-V11-01-page13.txt: [("'venerable", 'venerable')]
LibM19160101-V11-01-page44.txt: [("'....", '....'), ('\'\'.....".', '.....".'), ("'JAC'V", 'JACV'), ("'i", 'i'), ("'.", '.'), ("''.", '.'), ("'ti", 'ti'), ("'t", 't'), ("'sr", 'sr'), ("'Ae.", 'Ae.')]
LibM19160101-V11-01-page48.txt: [("'members", 'members')]
LibM19160101-V11-01e-page16.txt: [("'Washington", 'Washington')]
LibM19160101-V11-01e-page7.txt: [("'The", 'The')]
LibM19160401-V11-02-page10.txt: [("'a", 'a'), ("'Na", 'Na')]
LibM19160401-V11-02-page12.txt: [("'as", 'as')]
LibM19160401-V11-02-page16.txt: [("'if", 'if')]
LibM19160401-V11-02-page20.txt: [("'Company", 'Company')]
LibM19160401-V11-02-page26.txt: [("'It", 'It')]
LibM19160401-V11-02-page31.txt: [("'tis", 'tis'), ("'I", 'I')]
LibM19160401-V11-02-page46.txt: [("'Traitors", 'Traitors'), ('\'"', '"')]
LibM19160701-V11-03-page23.txt: [("'IM", 'IM')]
LibM19160701-V11-03-page27.txt: [("'An", 'An')]
LibM19160701-V11-03-page42.txt: [('\'"', '"')]
LibM19160701-V11-03-page6.txt: [("'neath", 'neath')]
LibM19161001-V11-04-page10.txt: [("'a", 'a')]
LibM19161001-V11-04-page19.txt: [("'rest", 'rest')]
LibM19161001-V11-04-page20.txt: [("'Illinois", 'Illinois')]
LibM19161001-V11-04-page35.txt: [("'regarding", 'regarding')]
LibM19161001-V11-04-page36.txt: [("'sent", 'sent')]
LibM19161001-V11-04-page37.txt: [("'the", 'the')]
LibM19161001-V11-04-page39.txt: [("'of", 'of')]
LibM19161001-V11-04-page41.txt: [("'court", 'court')]
LibM19170101-V12-01-page26.txt: [("'nternational", 'nternational')]
LibM19170101-V12-01-page27.txt: [("'Duprey's", 'Dupreys'), ("'Moore", 'Moore')]
LibM19170101-V12-01-page30.txt: [("'banishing", 'banishing')]
LibM19170101-V12-01-page35.txt: [("'ts", 'ts')]
LibM19170101-V12-01-page6.txt: [("'servile", 'servile')]
LibM19170401-V12-02-page16.txt: [("'no", 'no')]
LibM19170401-V12-02-page19.txt: [("'If", 'If')]
LibM19170401-V12-02-page20.txt: [("'Twas", 'Twas')]
LibM19170401-V12-02-page22.txt: [("'Tis", 'Tis')]
LibM19170401-V12-02-page25.txt: [("'Tis", 'Tis')]
LibM19170401-V12-02-page29.txt: [('\'"', '"')]
LibM19170401-V12-02-page5.txt: [("'o", 'o')]
LibM19170401-V12-02-page9.txt: [("'that", 'that')]
LibM19170701-V12-03-page1.txt: [("'al", 'al')]
LibM19170701-V12-03-page10.txt: [("'THE", 'THE')]
LibM19170701-V12-03-page12.txt: [('\'s"', 's"')]
LibM19170701-V12-03-page17.txt: [("'State", 'State')]
LibM19170701-V12-03-page29.txt: [("'the", 'the')]
LibM19171001-V12-04-page1.txt: [("'ublished", 'ublished')]
LibM19171001-V12-04-page16.txt: [("'mounted", 'mounted')]
LibM19171001-V12-04-page18.txt: [("'Luther", 'Luther'), ("'tboot", 'tboot')]
LibM19171001-V12-04-page27.txt: [("'us", 'us')]
LibM19171001-V12-04-page9.txt: [("'coordination", 'coordination'), ("'most", 'most')]
LibM19180101-V13-01-page4.txt: [('\'"E', '"E'), ("'attr", 'attr')]
LibM19180401-V13-02-page14.txt: [("'however", 'however')]
LibM19180401-V13-02-page22.txt: [("'of", 'of')]
LibM19180401-V13-02-page31.txt: [("'fields", 'fields')]
LibM19180401-V13-02-page36.txt: [("'THE", 'THE')]
LibM19180701-V13-03-page10.txt: [("'of", 'of')]
LibM19180701-V13-03-page21.txt: [("'no", 'no')]
LibM19180701-V13-03-page32.txt: [("'years", 'years')]
LibM19180701-V13-03-page34.txt: [("'what", 'what'), ("'Tis", 'Tis')]
LibM19181001-V13-04-page15.txt: [("'being", 'being')]
LibM19181001-V13-04-page19.txt: [("'EMOCRACY", 'EMOCRACY')]
LibM19181001-V13-04-page21.txt: [("'the", 'the')]
LibM19181001-V13-04-page25.txt: [("'virtually", 'virtually')]
LibM19190101-V15-01-page18.txt: [("'enforce", 'enforce')]
LibM19190101-V15-01-page19.txt: [("'remain", 'remain')]
LibM19190101-V15-01-page20.txt: [("'Oxtails", 'Oxtails')]
LibM19190101-V15-01-page22.txt: [("'Sunday", 'Sunday')]
LibM19190401-V15-02-page1.txt: [("'W", 'W')]
LibM19190401-V15-02-page14.txt: [("'the", 'the')]
LibM19190401-V15-02-page15.txt: [("'the", 'the'), ("'twixt", 'twixt')]
LibM19190401-V15-02-page16.txt: [("'the", 'the')]
LibM19190401-V15-02-page21.txt: [("'a", 'a')]
LibM19190401-V15-02-page22.txt: [("'million", 'million')]
LibM19190401-V15-02-page5.txt: [("'LE", 'LE'), ("'being", 'being')]
LibM19190401-V15-02-page6.txt: [("'According", 'According')]
LibM19190701-V15-03-page20.txt: [("'Presbyterian", 'Presbyterian')]
LibM19190701-V15-03-page21.txt: [("'or", 'or')]
LibM19190701-V15-03-page29.txt: [("'lewd", 'lewd')]
LibM19190701-V15-03-page30.txt: [("'v", 'v'), ("'he", 'he')]
LibM19190701-V15-03-page31.txt: [("'.", '.'), ("'the", 'the')]
LibM19190701-V15-03-page32.txt: [("'United", 'United')]
LibM19190701-V15-03-page33.txt: [("'and", 'and')]
LibM19191001-V15-04-page15.txt: [('\'"', '"')]
LibM19191001-V15-04-page18.txt: [("'five", 'five')]
LibM19191001-V15-04-page7.txt: [("'purity", 'purity'), ("'by", 'by')]
LibM19200101-V14-01-page1.txt: [("'IN", 'IN')]
LibM19200101-V14-01-page6.txt: [("'s", 's')]
LibM19200401-V14-02-page25.txt: [("'Volumes", 'Volumes')]
LibM19200701-V14-03-page15.txt: [("'racTIMIriiiiitiriiltililietcliteiViiVittiiiitiEVAlifittiA", 'racTIMIriiiiitiriiltililietcliteiViiVittiiiitiEVAlifittiA')]
LibM19200701-V14-03-page24.txt: [("'A", 'A')]
LibM19200701-V14-03-page27.txt: [("'concerned.", 'concerned.')]
LibM19200701-V14-03-page32.txt: [("'fourth", 'fourth')]
LibM19200701-V14-03-page33.txt: [("'the", 'the')]
LibM19200701-V14-03-page4.txt: [("'Twixt", 'Twixt')]
LibM19201001-V14-04-page15.txt: [("'m", 'm')]
LibM19201001-V14-04-page16.txt: [("'mannum", 'mannum')]
LibM19201001-V14-04-page23.txt: [("'Signs", 'Signs'), ("'Signs", 'Signs'), ("'Cut", 'Cut'), ("'Signs", 'Signs')]
LibM19201001-V14-04-page25.txt: [("'were", 'were')]
LibM19201001-V14-04-page29.txt: [("'praise", 'praise'), ("'for", 'for')]
LibM19201001-V14-04-page7.txt: [("'oppression.", 'oppression.')]
In [23]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction4

Average verified rate: 0.9811434974335735

Average of error rates: 0.03407373440939106

Total token count: 1452019

In [24]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[24]:
[("'", 1499),
 ('m', 1341),
 ('d', 1257),
 ('e', 1013),
 ('w', 957),
 ('t', 847),
 ('n', 787),
 ('r', 687),
 ('f', 634),
 ('g', 386),
 ('x', 271),
 ('u', 209),
 ('k', 192),
 ('tv', 150),
 ('th', 122),
 ('pa', 104),
 ('sunday-law', 92),
 ('re', 89),
 ('z', 83),
 ('ex', 77),
 ('co', 75),
 ('io', 72),
 ('id', 71),
 ('mo', 63),
 ('postmaster-general', 62),
 ('ga', 58),
 ('post-offices', 57),
 ('un', 57),
 ('un-american', 57),
 ('va', 57),
 ('statute-books', 56),
 ('sunday-closing', 54),
 ('church-and-state', 49),
 ('tion', 45),
 ('mm', 45),
 ('q', 44),
 ('li', 43),
 ('mt', 42),
 ('attorney-general', 41),
 ('sunday-rest', 39),
 ('wm', 38),
 ('ro', 38),
 ('pp', 38),
 ('mi', 37),
 ('charta', 37),
 ('mc', 33),
 ('al', 32),
 ('ri', 31),
 ('neander', 31),
 ('-', 30)]

Correction 5 -- Rejoin Burst Words

In [25]:
# %load shared_elements/rejoin_burst_words.py
prev = cycle
cycle = "correction5"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    pattern = re.compile("(\s(\w{1,2}\s){5,})")
    
    replacements = []
    clean.check_splits(pattern, spelling_dictionary, content, replacements)
    
    if len(replacements) > 0:
        print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
LibM19061001-V01-03-page17.txt: [(' r a t h e ', 'rathe')]
LibM19061001-V01-03-page24.txt: [('To', 'To')]
LibM19100101-V05-01-page22.txt: [('It', 'It')]
LibM19100401-V05-02-page52.txt: [('El', 'El')]
LibM19100701-V05-03-page19.txt: [(' f or w a r d\n', 'forward')]
LibM19120401-V07-02-page46.txt: [('It', 'It')]
LibM19121001-V07-04-page29.txt: [('As', 'As')]
LibM19121001-V07-04-page5.txt: [('El', 'El')]
LibM19150101-V10-01-page11.txt: [('To', 'To')]
LibM19150101-V10-01-page4.txt: [('Lo', 'Lo')]
LibM19150401-V10-02-page6.txt: [('\nU N U S U A L ', 'UNUSUAL')]
LibM19150701-V10-03-page27.txt: [('It', 'It')]
LibM19150701-V10-03-page47.txt: [(' m a n is a ', 'manisa')]
LibM19170401-V12-02-page5.txt: [(' p r es en t ', 'present')]
LibM19170701-V12-03-page16.txt: [('Is', 'Is')]
LibM19200101-V14-01-page6.txt: [(' c an n o t ', 'cannot')]
In [26]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction5

Average verified rate: 0.9811555435567139

Average of error rates: 0.0340564930300807

Total token count: 1451992

In [27]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[27]:
[("'", 1499),
 ('m', 1340),
 ('d', 1256),
 ('e', 1012),
 ('w', 956),
 ('t', 844),
 ('n', 784),
 ('r', 684),
 ('f', 633),
 ('g', 386),
 ('x', 271),
 ('u', 206),
 ('k', 192),
 ('tv', 150),
 ('th', 122),
 ('pa', 104),
 ('sunday-law', 92),
 ('re', 89),
 ('z', 83),
 ('ex', 77),
 ('co', 75),
 ('io', 72),
 ('id', 71),
 ('mo', 63),
 ('postmaster-general', 62),
 ('ga', 58),
 ('post-offices', 57),
 ('un', 57),
 ('un-american', 57),
 ('va', 57),
 ('statute-books', 56),
 ('sunday-closing', 54),
 ('church-and-state', 49),
 ('tion', 45),
 ('mm', 45),
 ('q', 44),
 ('li', 43),
 ('mt', 42),
 ('attorney-general', 41),
 ('sunday-rest', 39),
 ('wm', 38),
 ('ro', 38),
 ('pp', 38),
 ('mi', 37),
 ('charta', 37),
 ('mc', 33),
 ('al', 32),
 ('ri', 31),
 ('neander', 31),
 ('-', 30)]

Correction 6 -- Rejoin Split Words

In [28]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction6"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    errors = reports.identify_errors(tokens, spelling_dictionary)

    replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=False)
    
    if len(replacements) > 0:
        print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_split_words(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
LibM19060401-V01-01-page11.txt: [('Mc', 'Alister')]
LibM19060401-V01-01-page35.txt: [('ri', 'e'), ('re', 'd'), ('ti', 'c')]
LibM19061001-V01-03-page19.txt: [('Sabb', 'at')]
LibM19061001-V01-03-page21.txt: [('destruc', 'tion')]
LibM19061001-V01-03-page4.txt: [('spir', 'itual')]
LibM19061001-V01-03-page6.txt: [('LIBERT', 'Y')]
LibM19070101-V02-01-page2.txt: [('ti', 'A')]
LibM19070101-V02-01-page23.txt: [('LIBE', 'RTY')]
LibM19070101-V02-01-page6.txt: [('impor', 'tance')]
LibM19070401-V02-02-page17.txt: [('LAN', 'CASTER')]
LibM19070701-V02-03-page14.txt: [('Demi', 'god')]
LibM19070701-V02-03-page18.txt: [('unfort', 'unately')]
LibM19070701-V02-03-page23.txt: [('Mc', 'Alister')]
LibM19071001-V02-04-page12.txt: [('approv', 'e')]
LibM19071001-V02-04-page14.txt: [('MC', 'KENNA')]
LibM19071001-V02-04-page20.txt: [('controv', 'ersy')]
LibM19071001-V02-04-page38.txt: [('un', 'Christian'), ('Fr', 'eedom')]
LibM19071001-V02-04-page46.txt: [('co', 'operation')]
LibM19071001-V02-04-page48.txt: [('th', 'e')]
LibM19071001-V02-04-page50.txt: [('co', 'respondents')]
LibM19071001-V02-04-page51.txt: [('ren', 'al'), ('re', 'hob')]
LibM19080101-V03-01-page20.txt: [('un', 'Christian')]
LibM19080101-V03-01-page32.txt: [('ob', 'serve')]
LibM19080101-V03-01-page41.txt: [('self-govern', 'ment')]
LibM19080401-V03-02-page1.txt: [('pa', 'I')]
LibM19080401-V03-02-page28.txt: [('Legis', 'lation')]
LibM19080401-V03-02-page30.txt: [('upo', 'n')]
LibM19080701-V03-03-page1.txt: [('ra', 'ff')]
LibM19080701-V03-03-page20.txt: [('re', 'A')]
LibM19080701-V03-03-page28.txt: [('Sund', 'a')]
LibM19080701-V03-03-page37.txt: [('religi', 'o')]
LibM19080701-V03-03-page46.txt: [('Northweste', 'r')]
LibM19081001-V03-04-page11.txt: [('PHILA', 'DELPHIA'), ('WILLI', 'AM')]
LibM19081001-V03-04-page15.txt: [('fi', 'e')]
LibM19081001-V03-04-page19.txt: [('TH', 'E'), ('religi', 'o'), ('Por', 'e'), ('su', 'preme')]
LibM19090101-V04-01-page28.txt: [('threate', 'n')]
LibM19090101-V04-01-page33.txt: [('estab', 'lish')]
LibM19090101-V04-01-page45.txt: [('Mc', "Clure's")]
LibM19090401-V04-02-page11.txt: [('Bonif', 'ace')]
LibM19090401-V04-02-page12.txt: [('Boni', 'face')]
LibM19090401-V04-02-page2.txt: [('po', 'i')]
LibM19090401-V04-02-page31.txt: [('MC', 'MILLAN')]
LibM19090401-V04-02-page45.txt: [('eyo', 't')]
LibM19090401-V04-02-page48.txt: [('fr', 'Ee')]
LibM19090401-V04-02-page49.txt: [('om', 'a')]
LibM19090401-V04-02-page50.txt: [('co', 'operate')]
LibM19090401-V04-02-page52.txt: [('ma', "n's")]
LibM19090401-V04-02-page8.txt: [('Mc', 'Dermott')]
LibM19090401-V04-02-page9.txt: [('co', 'operation')]
LibM19090701-V04-03-page10.txt: [('RECEP', 'TION')]
LibM19090701-V04-03-page29.txt: [('Speakin', 'g')]
LibM19090701-V04-03-page49.txt: [('Appe', 'als')]
LibM19090701-V04-03-page7.txt: [('Mc', 'Crory')]
LibM19091001-V04-04-page38.txt: [('si', 'n')]
LibM19091001-V04-04-page47.txt: [('Appe', 'als')]
LibM19091001-V04-04-page7.txt: [('Mc', 'Kinley')]
LibM19100101-V05-01-page17.txt: [('th', 'at'), ('Mc', 'Kenna')]
LibM19100101-V05-01-page19.txt: [('incon', 'siderable')]
LibM19100101-V05-01-page26.txt: [('gua', 'ranteed')]
LibM19100101-V05-01-page31.txt: [('SU', 'NDAY')]
LibM19100101-V05-01-page39.txt: [('CA', 'Y')]
LibM19100401-V05-02-page15.txt: [('uncon', 'fessed')]
LibM19100401-V05-02-page23.txt: [('secre', 'tary')]
LibM19100401-V05-02-page35.txt: [('PeRsECUTI', 'ON')]
LibM19100401-V05-02-page52.txt: [('legisla', 'tor')]
LibM19100401-V05-02-page6.txt: [('un', 'Christian')]
LibM19100401-V05-02-page8.txt: [('PRESI', 'DENT')]
LibM19100701-V05-03-page1.txt: [('wo', 'g'), ('UN', 'I')]
LibM19100701-V05-03-page29.txt: [('al', 'ways')]
LibM19100701-V05-03-page37.txt: [('HISTOR', 'ICAL'), ('ERRON', 'EOUS')]
LibM19100701-V05-03-page45.txt: [('ch', 'ose')]
LibM19100701-V05-03-page49.txt: [('PROTES', 'TANT'), ('Re', 'stated')]
LibM19100701-V05-03-page7.txt: [('Mc', 'Kinley')]
LibM19101001-V05-04-page15.txt: [('co', 'operate')]
LibM19101001-V05-04-page28.txt: [('PA', 'L')]
LibM19101001-V05-04-page39.txt: [('libert', 'y')]
LibM19101001-V05-04-page49.txt: [('PROTES', 'TANT')]
LibM19101001-V05-04-page50.txt: [('co', 'operation')]
LibM19110101-V06-01-page1.txt: [('nU', 'M')]
LibM19110101-V06-01-page12.txt: [('compuls', 'ion')]
LibM19110101-V06-01-page18.txt: [('Mc', 'Donald')]
LibM19110101-V06-01-page34.txt: [('consid', 'ered'), ('RE', 'LIGION')]
LibM19110101-V06-01-page35.txt: [('shep', "herd's")]
LibM19110101-V06-01-page48.txt: [('ta', 'king')]
LibM19110101-V06-01-page5.txt: [('TI', 'E')]
LibM19110401-V06-02-page1.txt: [('mo', 'Jo')]
LibM19110401-V06-02-page12.txt: [('ment', 'on')]
LibM19110701-V06-03-page14.txt: [('un', 'Christian')]
LibM19110701-V06-03-page20.txt: [('WA', 'RTBURG')]
LibM19110701-V06-03-page25.txt: [('republi', 'c')]
LibM19110701-V06-03-page32.txt: [('religi', 'o')]
LibM19110701-V06-03-page33.txt: [('Switzerlan', 'd')]
LibM19110701-V06-03-page37.txt: [('religi', 'o')]
LibM19110701-V06-03-page38.txt: [('reen', 'forced')]
LibM19110701-V06-03-page50.txt: [('expe', 'rience')]
LibM19111001-V06-04-page11.txt: [('religi', 'o')]
LibM19111001-V06-04-page18.txt: [('Pontif', 'ex')]
LibM19111001-V06-04-page38.txt: [('Co', 'n')]
LibM19111001-V06-04-page43.txt: [('ma', 'king')]
LibM19111001-V06-04-page52.txt: [('ec', 'clesiastical'), ('kl', 'EE'), ('ra', 'm'), ('MI', 'M'), ('LI', 'II'), ('Li', 'N'), ('RI', 'M')]
LibM19120101-V07-01-page12.txt: [('certif', 'ying'), ('ern', 'e')]
LibM19120101-V07-01-page33.txt: [('Notwithstand', 'ing')]
LibM19120101-V07-01-page43.txt: [('ESTAB', 'LISHMENT')]
LibM19120101-V07-01-page49.txt: [('FA', 'IN'), ('TA', 'is'), ('SI', 'TA'), ('ci', 'T'), ('devel', 'opment')]
LibM19120401-V07-02-page23.txt: [('misrepres', 'entation')]
LibM19120401-V07-02-page5.txt: [('M.', '')]
LibM19120701-V07-03-page13.txt: [('hol', 'iday')]
LibM19120701-V07-03-page2.txt: [('Co', 'ercion')]
LibM19120701-V07-03-page4.txt: [('CO', 'NG'), ('gi', 'e')]
LibM19120701-V07-03-page5.txt: [('M.', '')]
LibM19120701-V07-03-page52.txt: [('M.', '')]
LibM19121001-V07-04-page19.txt: [('mul', 'titude'), ('proclama', 'tion')]
LibM19121001-V07-04-page4.txt: [('gl', 'O'), ('ma', 'm'), ('MI', 'M')]
LibM19121001-V07-04-page49.txt: [('gOR', 'E'), ('M.', '')]
LibM19121001-V07-04-page6.txt: [('mo', 'i'), ('G.', '')]
LibM19130101-V08-01-page1.txt: [('WA', 'tTS')]
LibM19130101-V08-01-page2.txt: [('M.', ''), ('Ni', 'M')]
LibM19130101-V08-01-page6.txt: [('LI', 'BERTY')]
LibM19130401-V08-02-page1.txt: [('Lil', 'A')]
LibM19130401-V08-02-page15.txt: [('re', 'pealed')]
LibM19130401-V08-02-page24.txt: [('impor', 'tance')]
LibM19130401-V08-02-page3.txt: [('CHOOS', 'ING')]
LibM19130401-V08-02-page33.txt: [('STURDEVA', 'NT')]
LibM19130401-V08-02-page34.txt: [('cer', 'O')]
LibM19130401-V08-02-page4.txt: [('po', 'O')]
LibM19130401-V08-02-page51.txt: [('denounci', 'ng'), ('JUSTI', 'FIES'), ('re', 't')]
LibM19130401-V08-02-page7.txt: [('M.', '')]
LibM19130701-V08-03-page2.txt: [('ti', 'e')]
LibM19130701-V08-03-page3.txt: [('PRIN', 'CIPLES'), ('GREA', 'T'), ('MI', 'M'), ('MA', 'M')]
LibM19130701-V08-03-page4.txt: [('XL', 'v')]
LibM19130701-V08-03-page41.txt: [('re', 'I')]
LibM19130701-V08-03-page44.txt: [('ce', 'e')]
LibM19130701-V08-03-page48.txt: [('unlawf', 'ul')]
LibM19130701-V08-03-page49.txt: [('eX', 't'), ('ADVER', 'TISED')]
LibM19130701-V08-03-page51.txt: [('AL', 'MA')]
LibM19131001-V08-04-page12.txt: [('yo', 'ng')]
LibM19131001-V08-04-page27.txt: [('EXI', 'LE')]
LibM19131001-V08-04-page28.txt: [('troub', 'ler')]
LibM19131001-V08-04-page4.txt: [('ro', 'o')]
LibM19131001-V08-04-page41.txt: [('ecclesi', 'astical')]
LibM19131001-V08-04-page49.txt: [('ADVER', 'TISED')]
LibM19131001-V08-04-page50.txt: [('re', 'No')]
LibM19131001-V08-04-page52.txt: [('Ak', 'A')]
LibM19131001-V08-04-page7.txt: [('M.', '')]
LibM19140101-V09-01-page31.txt: [('mo', 'I')]
LibM19140101-V09-01-page33.txt: [('RE', 'A')]
LibM19140101-V09-01-page54.txt: [('ADVER', 'TISED')]
LibM19140101-V09-01-page55.txt: [('EA', 'T'), ('CO', 'PY')]
LibM19140101-V09-01-page56.txt: [('relig', 'ion'), ('Ti', 'e')]
LibM19140401-V09-02-page11.txt: [('corporatio', 'n'), ('re', 'formation'), ('Congregatio', 'n')]
LibM19140401-V09-02-page12.txt: [('un', 'der'), ('coun', 'try'), ('combina', 'tion'), ('al', 'I')]
LibM19140401-V09-02-page13.txt: [('ti', 'nes')]
LibM19140401-V09-02-page18.txt: [('ASSEM', 'BLY')]
LibM19140401-V09-02-page25.txt: [('citi', 'zens')]
LibM19140401-V09-02-page3.txt: [('CIRCULAT', 'ING')]
LibM19140401-V09-02-page35.txt: [('PROTES', 'TANT')]
LibM19140401-V09-02-page4.txt: [('Ki', 'Ng')]
LibM19140401-V09-02-page41.txt: [('MAGA', 'ZINE')]
LibM19140401-V09-02-page50.txt: [('M.', ''), ('ADVER', 'TISED')]
LibM19140401-V09-02-page52.txt: [('re', 'ligious')]
LibM19140401-V09-02-page7.txt: [('M.', '')]
LibM19140701-V09-03-page2.txt: [('M.', '')]
LibM19140701-V09-03-page20.txt: [('MC', 'ADOO'), ('FR', 'T')]
LibM19140701-V09-03-page21.txt: [('MC', 'ADOO')]
LibM19140701-V09-03-page34.txt: [('ambi', 'tion')]
LibM19140701-V09-03-page4.txt: [('indi', 'tes')]
LibM19140701-V09-03-page51.txt: [('ti', 'The')]
LibM19140701-V09-03-page9.txt: [('sp', 'oken')]
LibM19141001-V09-04-page1.txt: [('Sp', 'A')]
LibM19141001-V09-04-page26.txt: [('TI', 'The')]
LibM19141001-V09-04-page27.txt: [('Al', 'ES')]
LibM19141001-V09-04-page3.txt: [('nI', 'M')]
LibM19141001-V09-04-page31.txt: [('un', 'fearing'), ('AMERI', 'CANS')]
LibM19141001-V09-04-page38.txt: [('es', 'tablish')]
LibM19141001-V09-04-page4.txt: [('ro', 'O'), ('Ki', 'M')]
LibM19141001-V09-04-page48.txt: [('Magaz', 'ine')]
LibM19141001-V09-04-page49.txt: [('MI', 'r')]
LibM19141001-V09-04-page50.txt: [('Mit', 'T'), ('li', 'M'), ('tE', 'E')]
LibM19141001-V09-04-page51.txt: [('monarchi', 'cal')]
LibM19141001-V09-04-page52.txt: [('Al', 'I')]
LibM19141001-V09-04-page7.txt: [('M.', '')]
LibM19150101-V10-01-page2.txt: [('pre', 'vent')]
LibM19150101-V10-01-page3.txt: [('SUBSCRIP', 'TIONS')]
LibM19150101-V10-01-page4.txt: [('Ki', 'M')]
LibM19150101-V10-01-page51.txt: [('Ti', 'E')]
LibM19150101-V10-01-page52.txt: [('MO', 'M'), ('Mi', 'M')]
LibM19150101-V10-01-page53.txt: [('STIN', 'G')]
LibM19150401-V10-02-page21.txt: [('RE', 'LIGIOUS')]
LibM19150401-V10-02-page28.txt: [('impor', 'tant')]
LibM19150401-V10-02-page3.txt: [('MI', 'M'), ('YA', 'M')]
LibM19150401-V10-02-page40.txt: [('op', 'ening')]
LibM19150401-V10-02-page42.txt: [('underg', 'o')]
LibM19150401-V10-02-page46.txt: [('Re', 'formation')]
LibM19150401-V10-02-page48.txt: [('sa', 'o')]
LibM19150401-V10-02-page50.txt: [('M.', '')]
LibM19150401-V10-02-page6.txt: [('ASSEMB', 'LE')]
LibM19150701-V10-03-page11.txt: [('expec', 'tation')]
LibM19150701-V10-03-page2.txt: [('Connecti', 'cut'), ('M.', '')]
LibM19150701-V10-03-page28.txt: [('violenc', 'e')]
LibM19150701-V10-03-page3.txt: [('Ki', 'M')]
LibM19150701-V10-03-page35.txt: [('lif', 'e')]
LibM19150701-V10-03-page38.txt: [('withou', 't')]
LibM19150701-V10-03-page42.txt: [('M.', '')]
LibM19150701-V10-03-page47.txt: [('po', 'se')]
LibM19150701-V10-03-page48.txt: [('twenty-f', 'our')]
LibM19151001-V10-04-page13.txt: [('politi', 'c')]
LibM19151001-V10-04-page2.txt: [('af', 'filiated')]
LibM19151001-V10-04-page22.txt: [('peo', 'ple')]
LibM19151001-V10-04-page23.txt: [('destruc', 'tion')]
LibM19151001-V10-04-page31.txt: [('Un', 'ion')]
LibM19151001-V10-04-page48.txt: [('rO', 'O'), ('RE', 'C'), ('Ama', 'rillo')]
LibM19151001-V10-04-page49.txt: [('RI', 'M')]
LibM19151001-V10-04-page50.txt: [('Ri', 'M'), ('EM', 'F')]
LibM19160101-V11-01-page12.txt: [('re', 'fused')]
LibM19160101-V11-01-page26.txt: [('se', 'an')]
LibM19160101-V11-01-page4.txt: [('M.', '')]
LibM19160101-V11-01e-page1.txt: [('mi', 'A')]
LibM19160101-V11-01e-page16.txt: [('ss', 'H')]
LibM19160401-V11-02-page10.txt: [('OBSERV', 'ANCE')]
LibM19160401-V11-02-page22.txt: [('re', 'ligious')]
LibM19160401-V11-02-page26.txt: [('Mi', 'n')]
LibM19160401-V11-02-page3.txt: [('MO', 'no')]
LibM19160401-V11-02-page38.txt: [('Pontif', 'ex')]
LibM19160401-V11-02-page4.txt: [('vis', 'ion'), ('teac', 'her')]
LibM19160701-V11-03-page21.txt: [('diplom', 'a')]
LibM19161001-V11-04-page29.txt: [('LI', 'BER')]
LibM19161001-V11-04-page40.txt: [('LIBERT', 'Y')]
LibM19170101-V12-01-page16.txt: [('pa', 'tient')]
LibM19170101-V12-01-page2.txt: [('af', 'filiated')]
LibM19170101-V12-01-page35.txt: [('AMMUN', 'ITION')]
LibM19170101-V12-01-page5.txt: [('re', 'garded')]
LibM19170701-V12-03-page1.txt: [('Lil', 'A')]
LibM19170701-V12-03-page14.txt: [('un', 'Christian')]
LibM19171001-V12-04-page10.txt: [('POR', 'TION')]
LibM19171001-V12-04-page11.txt: [('suav', 'ity')]
LibM19171001-V12-04-page23.txt: [('re', 'forming')]
LibM19180101-V13-01-page24.txt: [('temperanc', 'e')]
LibM19180101-V13-01-page35.txt: [('Th', 'e')]
LibM19180401-V13-02-page31.txt: [('se', 'a')]
LibM19180701-V13-03-page13.txt: [('FR', 'A')]
LibM19181001-V13-04-page14.txt: [('LI', 'BER')]
LibM19181001-V13-04-page32.txt: [('LIBERT', 'Y')]
LibM19190101-V15-01-page14.txt: [('CONFESSIO', 'N')]
LibM19190101-V15-01-page20.txt: [('pa', 'per')]
LibM19190101-V15-01-page21.txt: [('prin', 'ciple')]
LibM19190101-V15-01-page22.txt: [('un', 'Christian')]
LibM19190101-V15-01-page5.txt: [('COUN', 'TRY')]
LibM19190401-V15-02-page17.txt: [('RE', 'LIGIOUS')]
LibM19190401-V15-02-page22.txt: [('Ca', 'sar'), ('sar', 'the')]
LibM19190401-V15-02-page5.txt: [('vA', 'LE')]
LibM19190701-V15-03-page2.txt: [('affil', 'iated')]
LibM19190701-V15-03-page28.txt: [('peo', 'ple')]
LibM19190701-V15-03-page32.txt: [('reli', 'gion')]
LibM19190701-V15-03-page9.txt: [('religi', 'ous')]
LibM19191001-V15-04-page1.txt: [('Lil', 'A')]
LibM19191001-V15-04-page22.txt: [('Ma', 'Ma')]
LibM19191001-V15-04-page23.txt: [('MI', 'NI')]
LibM19191001-V15-04-page25.txt: [('Ma', 'Ms')]
LibM19200101-V14-01-page2.txt: [('M.', ''), ('enfor', 'ce')]
LibM19200401-V14-02-page13.txt: [('co', 'operation')]
LibM19200401-V14-02-page14.txt: [('Sund', 'a'), ('co', 'operation')]
LibM19200401-V14-02-page23.txt: [('se', 'a')]
LibM19200401-V14-02-page35.txt: [('TI', 'THE')]
LibM19200401-V14-02-page6.txt: [('LIBERT', 'Y')]
LibM19200701-V14-03-page14.txt: [('AL', 'L')]
LibM19200701-V14-03-page15.txt: [('M.', '')]
LibM19200701-V14-03-page3.txt: [('Tir', 'A')]
LibM19200701-V14-03-page8.txt: [('re', 'enacted')]
LibM19201001-V14-04-page23.txt: [('UN', 'Christian'), ('Un', 'Scriptural')]
LibM19201001-V14-04-page31.txt: [('wa', 'n')]
LibM19201001-V14-04-page32.txt: [('SY', 'St')]
LibM19201001-V14-04-page7.txt: [('gl', 'o'), ('Mayflo', 'wer')]
In [29]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction6

Average verified rate: 0.9814180834781028

Average of error rates: 0.03366287600880411

Total token count: 1451734

In [30]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[30]:
[("'", 1499),
 ('m', 1326),
 ('d', 1254),
 ('e', 998),
 ('w', 956),
 ('t', 837),
 ('n', 776),
 ('r', 682),
 ('f', 633),
 ('g', 384),
 ('x', 271),
 ('u', 206),
 ('k', 192),
 ('tv', 150),
 ('th', 119),
 ('pa', 100),
 ('sunday-law', 92),
 ('z', 83),
 ('ex', 75),
 ('io', 72),
 ('re', 72),
 ('id', 71),
 ('co', 64),
 ('postmaster-general', 62),
 ('mo', 58),
 ('ga', 58),
 ('post-offices', 57),
 ('un-american', 57),
 ('statute-books', 56),
 ('va', 56),
 ('sunday-closing', 54),
 ('church-and-state', 49),
 ('un', 46),
 ('mm', 46),
 ('q', 44),
 ('mt', 42),
 ('attorney-general', 41),
 ('tion', 40),
 ('sunday-rest', 39),
 ('wm', 38),
 ('pp', 38),
 ('charta', 37),
 ('ro', 36),
 ('li', 36),
 ('neander', 31),
 ('-', 30),
 ('seventhday', 30),
 ('mi', 28),
 ('es', 28),
 ('ft', 28)]

Correction 7 -- Rejoin Split Words II

In [31]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction7"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    errors = reports.identify_errors(tokens, spelling_dictionary)

    replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=True)
    
    if len(replacements) > 0:
        print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_split_words(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
LibM19060401-V01-01-page11.txt: [('de', 'calogue')]
LibM19060401-V01-01-page35.txt: [('en', 'th'), ('d', 'ak'), ('r', 'te')]
LibM19060701-V01-02-page12.txt: [('LIB', 'ERTY')]
LibM19061001-V01-03-page19.txt: [('m', 'es')]
LibM19061001-V01-03-page22.txt: [('bane', 'ful')]
LibM19070101-V02-01-page18.txt: [('IN', 'gress')]
LibM19070101-V02-01-page21.txt: [('W', 'ILLIMANTIC')]
LibM19070101-V02-01-page25.txt: [('r', 'esided')]
LibM19070401-V02-02-page31.txt: [('wine', 'bibbers')]
LibM19070401-V02-02-page36.txt: [('A', 'STI')]
LibM19070701-V02-03-page18.txt: [('WILL', 'IAMS')]
LibM19070701-V02-03-page8.txt: [('LIB', 'ERTY')]
LibM19071001-V02-04-page15.txt: [('rem', 'arkable'), ('n', 'ation')]
LibM19071001-V02-04-page32.txt: [('C', 'hr')]
LibM19071001-V02-04-page51.txt: [('for', 'th'), ('r', 'ow')]
LibM19080101-V03-01-page26.txt: [('PRES', 'IDENT')]
LibM19080101-V03-01-page31.txt: [('Cab', 'inet')]
LibM19080401-V03-02-page28.txt: [('Legis', 'lation')]
LibM19080401-V03-02-page30.txt: [('p', 'rinciple')]
LibM19080701-V03-03-page1.txt: [('ra', 'ff')]
LibM19080701-V03-03-page39.txt: [('move', 'ment')]
LibM19080701-V03-03-page43.txt: [('uni', 'versal')]
LibM19080701-V03-03-page52.txt: [('THE', 'RE'), ('L', 'um')]
LibM19081001-V03-04-page1.txt: [('V', 'oiD')]
LibM19081001-V03-04-page15.txt: [('con', 'demned')]
LibM19081001-V03-04-page38.txt: [('obj', 'ect')]
LibM19090101-V04-01-page33.txt: [('estab', 'lish')]
LibM19090101-V04-01-page52.txt: [('i', 'll')]
LibM19090401-V04-02-page2.txt: [('f', 'ri')]
LibM19090401-V04-02-page48.txt: [('fr', 'Ee')]
LibM19090701-V04-03-page34.txt: [('CHRIS', 'TIAN')]
LibM19090701-V04-03-page49.txt: [('APPEAR', 'ANCE')]
LibM19091001-V04-04-page41.txt: [('kin', 'gdom')]
LibM19091001-V04-04-page47.txt: [('APPEAR', 'ANCE')]
LibM19100101-V05-01-page14.txt: [('WASH', 'INGTON')]
LibM19100101-V05-01-page24.txt: [('per', 'se')]
LibM19100101-V05-01-page31.txt: [('L', 'OTS')]
LibM19100101-V05-01-page39.txt: [('L', 'OS')]
LibM19100101-V05-01-page49.txt: [('W', 'ASHINGTON')]
LibM19100401-V05-02-page23.txt: [('secre', 'tary')]
LibM19100401-V05-02-page5.txt: [('LIB', 'ERTY')]
LibM19100401-V05-02-page52.txt: [('C', 'HRISTIANITY'), ('Jan', 'uary')]
LibM19100701-V05-03-page32.txt: [('the', 'reof')]
LibM19100701-V05-03-page45.txt: [('ch', 'ose')]
LibM19100701-V05-03-page49.txt: [('W', 'ASHINGTON')]
LibM19101001-V05-04-page24.txt: [('MON', 'TREAL')]
LibM19101001-V05-04-page49.txt: [('Romani', 'sm')]
LibM19110401-V06-02-page18.txt: [('UNI', 'VERSITY')]
LibM19110701-V06-03-page32.txt: [('sent', 'iments'), ('to', 're')]
LibM19110701-V06-03-page37.txt: [('Chur', 'ch')]
LibM19110701-V06-03-page45.txt: [('the', 're')]
LibM19110701-V06-03-page50.txt: [('expe', 'rience')]
LibM19111001-V06-04-page16.txt: [('AMER', 'ICA')]
LibM19111001-V06-04-page52.txt: [('ec', 'clesiastical')]
LibM19120101-V07-01-page49.txt: [('FA', 'ro'), ('devel', 'opment')]
LibM19120101-V07-01-page50.txt: [('W', 'ASHINGTON')]
LibM19120401-V07-02-page48.txt: [('LIB', 'ERTY')]
LibM19120701-V07-03-page13.txt: [('hol', 'iday')]
LibM19120701-V07-03-page15.txt: [('St', 'ates')]
LibM19120701-V07-03-page2.txt: [('Co', 'ercion')]
LibM19120701-V07-03-page26.txt: [('gov', 'ernment')]
LibM19120701-V07-03-page38.txt: [('AMEND', 'MENTS')]
LibM19120701-V07-03-page4.txt: [('e', 'riK')]
LibM19120701-V07-03-page52.txt: [('A', 'VE'), ('t', 'ok'), ('N', 'Os')]
LibM19121001-V07-04-page11.txt: [('c', 'ognition')]
LibM19121001-V07-04-page29.txt: [('cit', 'ations')]
LibM19121001-V07-04-page44.txt: [('Hank', 'ow')]
LibM19121001-V07-04-page5.txt: [('R', 'EC')]
LibM19121001-V07-04-page6.txt: [('a', 'Yr'), ('a', 'dm'), ('he', 'ft'), ('I', 'lai')]
LibM19121001-V07-04-page8.txt: [('prop', 'osition')]
LibM19130101-V08-01-page24.txt: [('LIB', 'ERTY')]
LibM19130101-V08-01-page40.txt: [('state', 'ments')]
LibM19130101-V08-01-page42.txt: [('a', 'nd')]
LibM19130101-V08-01-page43.txt: [('im', 'prisonment')]
LibM19130101-V08-01-page49.txt: [('T', 'ennessee')]
LibM19130101-V08-01-page50.txt: [('Rev', 'ised'), ('and', 'Re')]
LibM19130101-V08-01-page6.txt: [('I', 'NG')]
LibM19130401-V08-02-page2.txt: [('Association', 'al')]
LibM19130401-V08-02-page25.txt: [('des', 'ecration')]
LibM19130401-V08-02-page30.txt: [('the', 're')]
LibM19130401-V08-02-page50.txt: [('and', 'Re')]
LibM19130701-V08-03-page2.txt: [('e', 'ta')]
LibM19130701-V08-03-page42.txt: [('GOV', 'ERNMENT')]
LibM19130701-V08-03-page51.txt: [('AL', 'MA')]
LibM19131001-V08-04-page12.txt: [('yo', 'ng')]
LibM19131001-V08-04-page41.txt: [('establish', 'ment')]
LibM19140101-V09-01-page19.txt: [('IN', 'TERIOR')]
LibM19140101-V09-01-page23.txt: [('govern', 'ment')]
LibM19140101-V09-01-page31.txt: [('s', 'AO')]
LibM19140101-V09-01-page38.txt: [('com', 'memoration')]
LibM19140101-V09-01-page53.txt: [('Ar', 'ticles')]
LibM19140101-V09-01-page56.txt: [('e', 're')]
LibM19140401-V09-02-page11.txt: [('i', 'ons')]
LibM19140401-V09-02-page12.txt: [('combina', 'tion')]
LibM19140401-V09-02-page13.txt: [('ti', 'nes')]
LibM19140401-V09-02-page25.txt: [('per', 'se')]
LibM19140401-V09-02-page4.txt: [('M', 'UN'), ('g', 'EE'), ('to', 'RE'), ('M', 'EH')]
LibM19140401-V09-02-page49.txt: [('e', 'th')]
LibM19140401-V09-02-page52.txt: [('re', 'ligious')]
LibM19140701-V09-03-page17.txt: [('and', 're')]
LibM19140701-V09-03-page20.txt: [('I', 'ts')]
LibM19140701-V09-03-page3.txt: [('or', 'zo')]
LibM19140701-V09-03-page30.txt: [('A', 'pologete')]
LibM19140701-V09-03-page34.txt: [('con', 'trary'), ('ambi', 'tion')]
LibM19140701-V09-03-page4.txt: [('M', 'io'), ('indi', 'tes'), ('M', 'UT')]
LibM19140701-V09-03-page49.txt: [('i', 'ke')]
LibM19141001-V09-04-page13.txt: [('by', 're')]
LibM19141001-V09-04-page29.txt: [('can', 'es')]
LibM19141001-V09-04-page3.txt: [('of', 'tenest'), ('m', 'om'), ('or', 'zo')]
LibM19141001-V09-04-page38.txt: [('es', 'tablish')]
LibM19141001-V09-04-page4.txt: [('M', 'Eg')]
LibM19141001-V09-04-page49.txt: [('i', 'nn'), ('I', 'ntr')]
LibM19141001-V09-04-page50.txt: [('L', 'os')]
LibM19150101-V10-01-page15.txt: [('con', 'sistency')]
LibM19150101-V10-01-page21.txt: [('per', 'se')]
LibM19150101-V10-01-page22.txt: [('per', 'se')]
LibM19150101-V10-01-page3.txt: [('Y', 'ou'), ('or', 'zo')]
LibM19150101-V10-01-page35.txt: [('Y', 'ork'), ('CRU', 'ISER')]
LibM19150101-V10-01-page4.txt: [('M', 'Eg')]
LibM19150101-V10-01-page53.txt: [('t', 'iro')]
LibM19150401-V10-02-page14.txt: [('pam', 'phlets')]
LibM19150401-V10-02-page23.txt: [('WASH', 'INGTON')]
LibM19150401-V10-02-page25.txt: [('WASH', 'INGTON')]
LibM19150401-V10-02-page28.txt: [('CAP', 'TIVE')]
LibM19150401-V10-02-page3.txt: [('M', 'UN'), ('or', 'zo')]
LibM19150401-V10-02-page46.txt: [('CALI', 'FORNIA')]
LibM19150401-V10-02-page49.txt: [('HUN', 'DRED')]
LibM19150701-V10-03-page19.txt: [('C', 'opyright')]
LibM19150701-V10-03-page2.txt: [('Massa', 'chusetts'), ('Col', 'lege')]
LibM19150701-V10-03-page3.txt: [('M', 'Eg'), ('illus', 'trated')]
LibM19150701-V10-03-page4.txt: [('I', 'Ng')]
LibM19151001-V10-04-page2.txt: [('af', 'filiated'), ('Col', 'lege')]
LibM19151001-V10-04-page48.txt: [('C', 'UE'), ('O', 'RE'), ('Ama', 'rillo')]
LibM19151001-V10-04-page49.txt: [('m', 'Es')]
LibM19151001-V10-04-page51.txt: [('E', 'Li')]
LibM19160101-V11-01-page12.txt: [('who', 're')]
LibM19160101-V11-01-page26.txt: [('per', 'se')]
LibM19160101-V11-01-page27.txt: [('per', 'se')]
LibM19160101-V11-01-page28.txt: [('R', 'ighteousness')]
LibM19160101-V11-01-page44.txt: [('r', 'ef')]
LibM19160101-V11-01-page6.txt: [('KEN', 'TUCKY')]
LibM19160101-V11-01-page7.txt: [('Calif', 'ornia')]
LibM19160101-V11-01e-page11.txt: [('per', 'se')]
LibM19160401-V11-02-page22.txt: [('mat', 'ters')]
LibM19160401-V11-02-page4.txt: [('C', 'hr')]
LibM19160401-V11-02-page48.txt: [('Mar', 'shal')]
LibM19160701-V11-03-page14.txt: [('A', 'fter')]
LibM19160701-V11-03-page15.txt: [('de', 'partment')]
LibM19160701-V11-03-page34.txt: [('r', 'ea')]
LibM19160701-V11-03-page39.txt: [('C', "esar's")]
LibM19161001-V11-04-page15.txt: [('CHAR', 'ACTERISTIC')]
LibM19161001-V11-04-page2.txt: [('inter', 'ests')]
LibM19170101-V12-01-page2.txt: [('Ten', 'nessee'), ('af', 'filiated')]
LibM19170101-V12-01-page21.txt: [('per', 'se')]
LibM19170101-V12-01-page3.txt: [('Sab', 'batarians')]
LibM19170101-V12-01-page30.txt: [('pro', 'hibit')]
LibM19170101-V12-01-page6.txt: [('and', 're')]
LibM19170701-V12-03-page4.txt: [('I', 'RE')]
LibM19170701-V12-03-page9.txt: [('f', 'undamentals')]
LibM19171001-V12-04-page10.txt: [('CON', 'SUMED')]
LibM19171001-V12-04-page11.txt: [('suav', 'ity')]
LibM19171001-V12-04-page19.txt: [('ha', 're')]
LibM19171001-V12-04-page30.txt: [('gen', 'eral'), ('S', 'HUTE')]
LibM19180101-V13-01-page11.txt: [('intro', 'duced')]
LibM19180101-V13-01-page12.txt: [('C', "esar's")]
LibM19180101-V13-01-page17.txt: [('deter', 'Mination')]
LibM19180101-V13-01-page24.txt: [('a', 'nd')]
LibM19180101-V13-01-page31.txt: [('a', 're')]
LibM19180401-V13-02-page20.txt: [('or', 'dained')]
LibM19180401-V13-02-page24.txt: [('MASSA', 'CHUSETTS')]
LibM19180701-V13-03-page16.txt: [('THE', 'TA')]
LibM19180701-V13-03-page19.txt: [('to', 'ut')]
LibM19181001-V13-04-page11.txt: [('per', 'se')]
LibM19181001-V13-04-page20.txt: [('peril', 'ous')]
LibM19190101-V15-01-page21.txt: [('per', 'se')]
LibM19190401-V15-02-page22.txt: [('to', 'Ca')]
LibM19190401-V15-02-page23.txt: [('le', 'Fevre')]
LibM19190701-V15-03-page18.txt: [('minor', 'ity')]
LibM19190701-V15-03-page2.txt: [('Of', 'fice'), ('affil', 'iated')]
LibM19190701-V15-03-page31.txt: [('per', 'se')]
LibM19190701-V15-03-page34.txt: [('in', 'struction')]
LibM19190701-V15-03-page6.txt: [('As', 'sn')]
LibM19191001-V15-04-page17.txt: [('bap', 'tism')]
LibM19191001-V15-04-page21.txt: [('O', 'NE')]
LibM19191001-V15-04-page24.txt: [('gov', 'ernment')]
LibM19200101-V14-01-page5.txt: [('a', 'nd')]
LibM19200101-V14-01-page6.txt: [('con', 'cerning')]
LibM19200401-V14-02-page11.txt: [('W', 'ashington')]
LibM19200401-V14-02-page22.txt: [('per', 'se')]
LibM19200401-V14-02-page23.txt: [('per', 'se')]
LibM19200401-V14-02-page27.txt: [('pro', 'tection')]
LibM19200401-V14-02-page36.txt: [('z', 'oo')]
LibM19200701-V14-03-page19.txt: [('ques', 'tions')]
LibM19200701-V14-03-page25.txt: [('per', 'se')]
LibM19200701-V14-03-page36.txt: [('t', 'itI')]
In [32]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction7

Average verified rate: 0.9815507052480597

Average of error rates: 0.033392883345561265

Total token count: 1451546

In [33]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[33]:
[("'", 1499),
 ('m', 1314),
 ('d', 1253),
 ('e', 997),
 ('w', 951),
 ('t', 835),
 ('n', 774),
 ('r', 677),
 ('f', 631),
 ('g', 383),
 ('x', 271),
 ('u', 206),
 ('k', 192),
 ('tv', 150),
 ('th', 117),
 ('pa', 100),
 ('sunday-law', 92),
 ('z', 82),
 ('ex', 75),
 ('io', 71),
 ('id', 71),
 ('co', 64),
 ('postmaster-general', 62),
 ('re', 59),
 ('mo', 58),
 ('ga', 58),
 ('post-offices', 57),
 ('un-american', 57),
 ('statute-books', 56),
 ('va', 56),
 ('sunday-closing', 54),
 ('church-and-state', 49),
 ('mm', 46),
 ('q', 44),
 ('un', 43),
 ('mt', 42),
 ('attorney-general', 41),
 ('tion', 40),
 ('sunday-rest', 39),
 ('wm', 38),
 ('pp', 38),
 ('charta', 37),
 ('ro', 35),
 ('li', 35),
 ('neander', 31),
 ('-', 30),
 ('seventhday', 30),
 ('mi', 28),
 ('ky', 28),
 ('religio-political', 27)]

Correction 8 -- Remove Long Tokens with Repeating "m"s

In [34]:
reports.long_errors(errors_summary, min_length=17)[:50]
Out[34]:
(['countermemorialists',
  'immumnitommuummunitimmtwuntnimmummiona',
  'antiprohibitionists',
  'mmierriotitimmiiembitimiimerimiim',
  'vuaziffiemunimeluitennotinutnnifin',
  'nrmomoommomomrsoommommokmagmkwon',
  'iiiwtierttititiiiit',
  'iiimiumiummummimmominimmimmimmimmihmimmiimminimummummimmumummemimmimmimm',
  'church-and-stateunion',
  'iiiirreriiitlhinifid',
  'simmmismwklaiigitil',
  'mimmimmummiiimmimii',
  'iiiiiiiiiiiliiiiiiii',
  'rwiumwimmiiiiimimmumnii',
  'mmmmmmmmmmmmmmmmmns',
  'pilurprmarasigimmt',
  'preventivejurisdiction',
  'miilmilliiimilliifilmidid',
  'enosnantiemotainotientetiemtio',
  'mmozmrommomommonorummanoz',
  'humilffilitiffinummiffiiimminlimmummiiiiiiiiimm',
  'xramoxmozramommommocmommmommx',
  'seventh-day-observing',
  'yffinsmemmmmmmmwmswmmmmmmmnim',
  'ititeiltintonecfctration',
  'migininaugimmikimmu',
  'latitteilommtwtfifolror',
  'mvstimpsmgrecuttliv',
  'iullnnunulnmmuumnuluunnunuumlt',
  'mgraotrtraccommozraglgraccommicami',
  'aommommemsatammogarmaxsorarmwelimmelinuilmenompommixliniewtlominermiimmurpimumnuommurm',
  'iiiiiiiiiiimmiumulinuilmilne',
  'better-established',
  'rnomommonoszuzummummanmmollommom',
  'nemmiwiiiimortrinl',
  'ffiummummiummunnummumummmumumummummunamummunummuumummmunummunnummumummumnitumnims',
  'counter-petitioners',
  'imummintommumnimminumummmummlimmunumummummmunumumminutimmummmitimumnimmm',
  'inimlfilninninilli',
  'mmmmmmmmermmmmmmmmmmmmmmm',
  'gawavaiaaamminonwirit',
  'xxxxxxxxxxmocxxxxxxxx',
  'miiiiiniiiiiiimilimiiiiiiiiiiimiliiiiiimmionimmiumingiiiiiiiiiiiimmiliiiimmomiiiiminwiliiiiiiiiiiiiiiiiiinminsummuilimiliiinimonnimmiiiiiiiiiiiiiiiiiiimiimiq',
  'wimiimilliiiiiiiiiiiiimmimmithiiiiimmumminunifiniiiii',
  'unimilismimitimittnismitimmimittlimummumitemitimmummmintimmimiumiumnitimllminiummuntiummilmi',
  'anumilimminiumminnimminumminnummiliniummiliml',
  'affindlitilffilillikvillehd',
  'mmmmmmmmmuimmmmmmrimmmmmmmmmmmmmmm',
  'muminatatimiumumuutumitimmittimmummminnumminumuffiummumummunnomiminummuummummimmumnini',
  'rsomravramcmotrammragmonommxmommansom',
  'mmmmmmffirimmmmmmmmmmmmemmmmmmm',
  'antiecclesiastical',
  'penmenisrisdinaorabsesiceewer',
  'ractimiriiiiitiriiltililietcliteiviivittiiiitievalifittia',
  'politico-religious',
  'niviitiesialiffiliifiiilrimlnii',
  'unemeeeeeeneeleeneeetelli',
  'vaaffisl-co-pacific',
  'lllllllllllllllllllllllllllllllllllllllllllllllllllllllllll',
  "mmmosmermsimmmemmnm'iligh",
  'mmmmmnemmommmnmmmme',
  'religion-and-state',
  'ifaimitialiumuumnimimmtmimummuimmunimiummitinimminimmumminummumunnommumminumninummunim',
  'iiiiiiiiiiiiiiiiiiiiiii',
  'mgimmmmmmmmmmmmmmmmmmmmmmmmmmmm',
  'msossgmaiaassmgeamakawmalnarlaa',
  'lecosniiionpainoticsovicesfirde',
  "linunimmimrs'inumumu",
  'rrrprrrrrrrritrrrf',
  'lamjukgmdavagixiatm',
  'toforeigncountries',
  'wmiwimiiiiiiiiiiiiiiimmiumm',
  'tixtreciremyemiresnirtiortiorrioritortiorrii',
  'democraticrepublican',
  'hihinhiniiiiiiirin',
  'two-and-a-halfmile',
  'commander-in-chief',
  'self-determination',
  'mmimumumwwwiiiiiiiiiiiiiiiiiiiiiiiiiiiiilleeleteeemememme',
  'nmmmommrsonomrznemonmonomnrmotruomonom',
  'emmonmenommomumommommommonotrnommirnmn',
  'iiiiiiilliniemniiiiii',
  'tsereanctosrothciertny',
  'snlrnuurinunuununa',
  'rimareinsmiummisimememesiermem',
  'mcommommommuommommonomm',
  'tiarezemieeleismikiimeeemiewew',
  'emerhilsamalsinalso',
  'pimumwmummuniumummtimmtunit',
  'burckhardt-schatzmann',
  'constitution-makers',
  'limmiiiiimiiiiiiiiiiiiiiiiiiiiiiiiiiiiie',
  'hummmtimmmummummummore',
  'iiiiiiiiiiiiiiiiiii',
  'feemowiwiedimeiersig',
  'one-day-rest-inseven',
  'maimimiummaimmismilinuminutimmuminiumilmmitimmummumwmoimminummiumnimmititilowinitimiiiti',
  'moerlrlreemoinmemmommmommikumoe',
  'mmmmmmmmmmmmmmmmmmmmmm',
  'postmaster-general',
  'establishingreligious',
  'one-day-rest-in-seven',
  'emelieniwionsavibannotisloneemite',
  'vice-president-elect',
  'faipmkrivmriiyamkrkilsriiirrrriiiirrrrisikv',
  'rilifininniimummaimumeiminiiiiiiiniiiiiiiiiiiiiiiiiimmpumummhimumwommiimmiimmiiiiimmimmimmimiumnimimmim',
  'e-illmllommimilimmilummumenimilmnimuningumminumiiiiimilmimmunimifinnilionontimmigimiliiimiffiliffilimiliiiiiiiiiiiiimm',
  'unummonummtummunialliimumiir',
  'hiiiiiiiiiiiiiiiii',
  'wimmummtmmuntifiummiummiummmommumwffimmiummummummulmtmminammmmunnummmumummummumummr',
  'smmusissommummusismussmimussissusissimmimmiiiiiiium',
  'hiimiiiiiiiiiiiiiiiiniiumiuminimui',
  'iiiimmummiatumbiiiiimbimummiiiiiiimmimm',
  'xxxxxxxxxxxxxxxxxx',
  'ramtersimrammemarkirracarmermartm',
  'mmipoinnonfoemnnioannim',
  'nomenegvoicedienast',
  'inoomalloisossimis',
  'nmommumammammunnumumuum',
  'mozmnmwommolzemrammonommommommommn',
  'iillrieeiaiiirriardi',
  'agaomoorwairalioigtiargial',
  'lmiiiiiiiiiiiiiiiiiii',
  'ihilibillilltreterita',
  'secretary-ofthe-interior',
  'conscience-fettered',
  'muummmunnummonmummumuummmunimmupm',
  'campbell-bannerman',
  'impreeloreesocoeselaal',
  'ffassininsonsiwoloolgasers',
  'unnnnnnunnmamnumununnmmunmniiimm',
  'non-sunday-observing',
  'piihnummuumbhimurunimenhomuummununimminhhohuminumuunummunnuhhhimminbui',
  'mheminuffinfillffilimis',
  'alliallallialliallaillassiiiiiiiiiiimiiiiiiiiiiiiim',
  'mmmmeimmimmmmmmmmmmmmmmttmmmmmtim',
  'inforfaisiomomincomocadoviemmigoimiwa',
  'smossmunssunommummusnmussmssmissussmsmussmmssmissmossmussussummmmusstmosssmsmssmnnsmimmumsmimmwsrmossumms',
  'sssssssssssssssssssss',
  'mmiiiimumhimimmiiiiimm',
  'statesman-preacher',
  'inter-denominational',
  'nosonmomorwemcwaint',
  'reconstructionists',
  'mmommmommommommmom',
  'mnrummommommoncommommmown',
  'nfiemmeemmeemmmeeeeeeeeeeeemeeem',
  'ipuitnilinimilliiiinulillluunii',
  'ehmmmmmmmmmmmmmmmmmmmmmmmmmmmmm',
  'mmmmommmmmmmwmnirimemmmmmm',
  'wamegkimnmrummmmesemvmmmrmk',
  'nininimummujimininlini',
  'lieutenantgovernor',
  'self-glorification',
  'jiuwuuwnnwumllonllllllllhihiiuiiuihul',
  'ommumniummunuimiumuutimutimmulummimmiummintomunmumumummumumumnomminuninumninummumumummtuntiummirt',
  'self-aggrandizement',
  'mummmummimmimmimmimmiiiimmiiiimmiummimmiimimmimmimmiihimmiimmi',
  'religious-sabbatic',
  'iitoitllislossoliiosill',
  'intheszealwarfejrrnicenathemoatiry',
  'religio-constitutional',
  'iiiiiiiiiiiiiiiiiiii',
  'personal-heart-conversion',
  'much-to-be-desired',
  'netlftrrmmidhimizmmommommilvmm',
  'curiosity-gratifying',
  'ummuummutmummuummmiummummummummumumminummonummunummmummuummuuttimmumut',
  'immuunumummummmtuummummiumunumtumffimmmutummunmuu',
  'mconslfaitmeegtifo',
  'monmmaimmenimmmmmmmemm',
  'itmlinillitiniiimmullimitilittiminunitiffitiminimmituniumnitmitilistimmilimutiiiiiimitimitintiumnimmummitm',
  'emsmwmmmwmmmnmhoneni',
  'eimmiumiiiiiiimmumummiiimumillimimminimumaniumiffiffimmiummuumniimmommumiummlinmmiumullimmi',
  'selfaggrandizement',
  'flummimmumommifiumwmffimmumnimmimummlimmumimmmunlimmmummmumuummumummlimuummumumumung',
  'faimmeigegrommegfa',
  'satisfactostruction',
  'miommooomoomsoicimuchmusuoihiuoimisiummicosississinasseeememeescs',
  'monommomozragrammxragnm',
  'pecsetemmeltigazolom',
  'ssumsffismssumusummummtmussessumnsumussunstsmossmossmwsussmumunnmunsummossumsnwssumminimmsnintminimmusmussinissunues',
  'trgatimedimegoovemotwo',
  'iiimillilintirnimmimmiiiiiiiiiiiiiiitiiiiiiiiiiiiiffilii',
  'animmiimmimmiimmiimimmiwimmumwhimommommiimmihmummimiummimmemiimmummummenumiiiimmuumminummis',
  "attorney-general's",
  'rimmineiiiiiiiiimirre',
  'sunday-enforcement',
  'momeoecimmoimommomommoiximm',
  'nitroenrtenaddlimeg',
  'mititayerwiriiiinicrierier',
  'twenty-four-hour-day',
  'atssussusumoususissonclaciiimmiimmisiscommissi',
  'ragmmmmmmmmmmmmmmmmmmmmimmm',
  'mmmmmmmmmmmmmmmmme',
  'xectimmecemommommiimommommomme',
  'mmohcomemmaragraanilmmmohm',
  'iiiiiiiinillitiiii',
  'criiitriatoyearetriarmireirntrecltwieviretriarctieanyaremiractmiteetreowehatio',
  'lllllllllllllllllllllllllll',
  'state-and-religion',
  'compulsory-sunday-law',
  'iiiiiiiiiiiiiiiiiiiiiiiiiii',
  'hmhimimmiiiiiimmihmiimumm',
  'iiiiiiiiiiiiiiiiiiiii',
  'mouaamaaammmaaaaaaaaamamanmmammmammimaaaaaamaaaaaammiaaaaaa',
  'illlllulllllllllllllllllllllllllllll',
  'iwiiiiiiiiiiiiiiiiii',
  'associate-justices',
  'mmumiimmumiummimmiimmiiimimmiwimmumbiumummuimmiimmwimummummirmiumie',
  'consaalermtooldlny',
  'counterdenunciations',
  'wralrammimmrzrznomnommgmmonom',
  'iiiiiiiimmiiiiiiiiiiiiimmium',
  'iimmiimmummuimmimummwimummmimmimimmiummmummuminwimbhmmimmiliniffillinnuffiffill',
  'obviouslyagreement',
  'one-dayof-rest-in-seven',
  'mezmommommonommomommommommommmom',
  'rrigtreatiariiirriiriiiiriiiitrivittioriiilrrictiiilriiiitii',
  'mmommommimommotimmotmm',
  'history-confirming',
  'semi-ecclesiastical',
  'lffiffimffithimmouninoffimmommummuommunimmonwiniiiiminnumumminriumminlimminiiiiiiiiiiimmonimum',
  'tomplonsesolomerol',
  'mmmmmmnswmmesimmornmmmmmm',
  'intelligent-looking',
  'ivosengtoexirmemed',
  'suspension-bridges',
  'self-righteousness',
  'miummuiiiiiiiiiimmummmiiiiihimmmummimmummimmimi',
  'mmemmmmmmmmmmeemmmemmmmmmmmmm',
  'mmmnwnsommmmmmmmrmmmolm',
  'iumitimumummumunnumaintimmummumumiumummumummtunmitimuumminnimuummuummumminumismiumnimmuntimmmuthw',
  'five-million-dollar',
  'esnmemmmmmmrimmmmeenmmemmm',
  'politico-ecclesiastical',
  'counterallegations',
  'meenmenmmmmmmmmmawknmmmmmeg',
  'mmmmmmmmmmmmmmmmmmmx',
  'alaska-yukon-pacific',
  'ecemoictiemememoodemeeeme',
  'commandment-keepers',
  'trothofabusesandegurpatent',
  'trading-with-the-enemy',
  'go-to-churchor-stay-indoors',
  'bureau-of-military-intelligence',
  'mimmutinsimiunimminimmummusilinnimmimuminnumminnimmummilinuffisliinummimilmilimitimiumminniiniiitimitimmimmilimititinnum',
  'seventeenth-century',
  'iitiiiitiwiiiititivaititignifittaiiiitqawilitilitit',
  'mmniummiunrimiiiiiiiilfmfiinotoiiimun',
  'ussosiiimiwohmiiiiiiiismisoisisiiiiminallioisoisoisoisosiososi',
  'mrnmimmmm-rimmmmmmoamrimmmmm',
  'prescott-wilson-tumulty',
  'nifilnimifintilllillflillifilnifilmiummiiffillfill',
  'inimiiiiiminunimmilimumusinominimuninimmilmr',
  'emirmeilsaarsinemiliehmee',
  'rograssmargmeermirl',
  'ttimilimmumulminnittinitintinninitutimmi',
  'frankfort-on-the-main',
  'glilihiliiiiiiraiii',
  'religious-legislation',
  'intfilnilhimirimihimmihimiirminlnimimiriminiimium',
  'minimmiiiiiiiiiimihwohimulla',
  'one-day-of-rest-in-seven',
  'uvrapsimisulswipampiampv',
  'lieutenant-colonel',
  'iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii',
  'yhmommomownwmmmmmmm',
  'half-pintof-claret',
  'mutummimmomminumummummumunimmiumummummlimummmumumunummtimummimintowitmmummrx',
  'immegmmmmmmmmmmmmmmmmmr',
  'mmmeammmmmmmmmmmmmmmmmmmemmmmm',
  'parochial-school-system',
  'ztkirmintzflrmerifranc',
  'mramiluesimairrimamesiemiamemilie',
  'emimmiummehmemeimmimminimmummminimmeminimmumummemminiummunieli',
  'agretiitilitltitstriffigtisifitiveram',
  'wommiumniffunivirlsoir',
  'maher-shalal-hash-baz',
  'xxxxxxxxxxxxxxxxxxxxxx',
  'xxxxxxxxxxxxxxxxxxxxx',
  'ossionosollsomasismisiiiiiisiiimissimisomallaallallaillaffluss',
  'mommummmufflummunmuummmutommummmuummmumumummummumunummummuunmuumwo',
  'governmentsupported',
  'immmotzmotatmtmommzum',
  'mmommomeommmmozmommotrmmgramopagr',
  'heaven-enlightened',
  "postmaster-general's",
  'mmgrommmmommgrmommmoromrmonorz',
  'succeedinggenerations',
  'imememememeinimeimii',
  'siiiiiiilitaiiiiiiiiiaisill',
  'self-contradictory',
  'ostammosanosonsorr',
  'rnitivittiltifirmi',
  'muniummmitimlinini',
  'ormucesemmommannumorammosimemaamoutammovomnumeammnommukumumonmustormmummunno',
  'counterdemonstrations',
  'i-ifidairicliiiriiirroi',
  'thefactthattheyinvolvethevitalprinciple',
  'inmpaiavimmipamipammmiximp',
  'demonstrainfluence',
  'nunnnunnuuuuuuuuuuunnuuuuuuuuuunnnuuunmuuuumusuuuuunuuuuuuuuuuuuuuuununnnnnunuuuuunuuuuuuuuunuwuum',
  'immumimilimitmliminiiimiiiiiiiiiiiiiiiiiiiimmiiiiiiiiiiimmintmill',
  'milliummiumunmionwimmimmiumr',
  'weiverreitaararforreahaarivitoroyerriiivii',
  'vriliriiifiertailitarectrinfeltriatiatictitlifie',
  'iiimuumimiiiiiimhomidfinnlinlinnflunnhohhohimimhhommilinlinflo',
  'maher-shalalhash-baz',
  'jerusalem-to-jericho',
  'church-and-state-union',
  'mimmomiosomosoissoisioissossosivissossicsiiiss',
  'hriimmiiiiimeimiiimihni'],
 17)

Remove long tokens with long strings of "m", "i", "l", "x"

In [35]:
# %load shared_elements/remove-tokens-with-long-strings-of-characters.py
prev = "correction7"
cycle = "correction8"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    replacements = []
    replacements.append(clean.check_for_repeating_characters(tokens, "m"))
    replacements.append(clean.check_for_repeating_characters(tokens, "M"))
    replacements.append(clean.check_for_repeating_characters(tokens, "i"))
    replacements.append(clean.check_for_repeating_characters(tokens, "I"))
    replacements.append(clean.check_for_repeating_characters(tokens, "l"))
    replacements.append(clean.check_for_repeating_characters(tokens, "x"))
    replacements.append(clean.check_for_repeating_characters(tokens, "X"))
    replacements.append(clean.check_for_repeating_characters(tokens, "u"))
    replacements.append(clean.check_for_repeating_characters(tokens, "n"))
    
    replacements = [item for sublist in replacements for item in sublist]
            
    if len(replacements) > 0:
        print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    else:
        pass
    
    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
LibM19080101-V03-01-page1.txt: [('wimmummtmmuntifiummiummiummmommumwffimmiummummummulmtmminammmmunnummmumummummumummr', ' '), ('JiuWuuWnnWumllonllllllllhIHIIUIIUIHUL', ' ')]
LibM19080401-V03-02-page1.txt: [('ifaimitialiumuumnimimmtmimummuimmunimiummitinimminimmumminummumunnommumminumninummunim', ' '), ('mutummimmomminumummummumunimmiumummummlimummmumumunummtimummimintowitmmummrx', ' '), ('hummmtimmmummummummore', ' '), ('lllllllllllllllllllllllllll', ' '), ('lllllllllllllllll', ' ')]
LibM19080701-V03-03-page1.txt: [('muminatatimiumumuutumitimmittimmummminnumminumuffiummumummunnomiminummuummummimmumnini', ' '), ('Eimmiumiiiiiiimmumummiiimumillimimminimumaniumiffiffimmiummuumniimmommumiummlinmmiumullimmi', ' '), ('MMIMMIMMIMUMMIP', ' '), ('Eimmiumiiiiiiimmumummiiimumillimimminimumaniumiffiffimmiummuumniimmommumiummlinmmiumullimmi', ' ')]
LibM19081001-V03-04-page1.txt: [('flummimmumommifiumwmffimmumnimmimummlimmumimmmunlimmmummmumuummumummlimuummumumumung', ' '), ('MMUMIIMMUMIUMMIMMIIMMIIIMIMMIWIMMUMBIUMUMMUIMMIIMMWIMUMMUMMIRMIUMIE', ' '), ('HumilffilitiffinumMIffiiimminlIMMUMMIIIIIIIIIMM', ' '), ('MMUMIIMMUMIUMMIMMIIMMIIIMIMMIWIMMUMBIUMUMMUIMMIIMMWIMUMMUMMIRMIUMIE', ' '), ('UMMIIIIIIMIIIIIM', ' ')]
LibM19090101-V04-01-page1.txt: [('iumitimumummumunnumaintimmummumumiumummumummtunmitimuumminnimuummuummumminumismiumnimmuntimmmuthw.', ' '), ('iumitimumummumunnumaintimmummumumiumummumummtunmitimuumminnimuummuummumminumismiumnimmuntimmmuthw.', ' ')]
LibM19090401-V04-02-page1.txt: [('HMHIMIMMIIIIIIMMIHMIIMUMM', ' '), ('PIIHNUMMUUMBHIMURUNIMENHOMUUMMUNUNIMMINHHOHUMINUMUUNUMMUNNUHHHIMMINBUI', ' '), ('HMHIMIMMIIIIIIMMIHMIIMUMM', ' '), ('unnnnnnunnmamnumununnmMunmNIIIMM', ' ')]
LibM19090701-V04-03-page1.txt: [('ummuummutmummuummmiummummummummumumminummonummunummmummuummuuttimmumut', ' '), ('MIUMMUIIIIIIIIIIMMUMMMIIIIIHIMMMUMMIMMUMMIMMIMI', ' '), ('MIUMMUIIIIIIIIIIMMUMMMIIIIIHIMMMUMMIMMUMMIMMIMI', ' '), ('lllllllllllllllllllllllllllllllllllllllllllllllllllllllllll', ' '), ('ummuummutmummuummmiummummummummumumminummonummunummmummuummuuttimmumut', ' ')]
LibM19100101-V05-01-page1.txt: [('nmommumammammunnumumuuM', ' '), ('MUMMMUMMIMMIMMIMMIMMIIIIMMIIIIMMIUMMIMMIIMIMMIMMIMMIIHIMMIIMMI', ' '), ('rilifininniiMUMMAIMUMEIMiniiiiiiiniiiiiiiiiiiiiiiiiiMMPUMUMMHIMUMWOMMIIMMIIMMIIIIIMMIMMIMMIMIUMNIMIMMIM', ' '), ('rilifininniiMUMMAIMUMEIMiniiiiiiiniiiiiiiiiiiiiiiiiiMMPUMUMMHIMUMWOMMIIMMIIMMIIIIIMMIMMIMMIMIUMNIMIMMIM', ' '), ('MUMMMUMMIMMIMMIMMIMMIIIIMMIIIIMMIUMMIMMIIMIMMIMMIMMIIHIMMIIMMI', ' '), ('rilifininniiMUMMAIMUMEIMiniiiiiiiniiiiiiiiiiiiiiiiiiMMPUMUMMHIMUMWOMMIIMMIIMMIIIIIMMIMMIMMIMIUMNIMIMMIM', ' '), ('lliilligijnirMli"Illj', ' ')]
LibM19100401-V05-02-page1.txt: [('unummonummtummunialliimumiir', ' '), ('ommumniummunuimiumuutimutimmulummimmiummintomunmumumummumumumnomminuninumninummumumummtuntiummirt', ' '), ('WIMIIMilliiiiiiiiiiiiimmimmithiiiiimmumminunifiniiiii', ' '), ('mommummmufflummunmuummmutommummmuummmumumummummumunummummuunmuumwo', ' '), ('MMIIIIMUMHIMIMMIIIIIMM', ' '), ('iiiimmummiatUMBIIIIIMBIMUMMIIIIIIIMMIMM', ' '), ('WIMIIMilliiiiiiiiiiiiimmimmithiiiiimmumminunifiniiiii', ' '), ('iiiimmummiatUMBIIIIIMBIMUMMIIIIIIIMMIMM', ' '), ('mommummmufflummunmuummmutommummmuummmumumummummumunummummuunmuumwo', ' ')]
LibM19100701-V05-03-page1.txt: [('imummintommumnimminumummmummlimmunumummummmunumumminutimmummmitimumnimmm', ' '), ('immuunumummummmtuummummiumunumtumffimmmutummunmuu', ' '), ('immuunumummummmtuummummiumunumtumffimmmutummunmuu', ' ')]
LibM19101001-V05-04-page1.txt: [('MIMMIMMUMMIIIMMIMII', ' '), ('ANIMMIIMMIMMIIMMIIMIMMIWIMMUMWHIMOMMOMMIIMMIHMUMMIMIUMMIMMEMIIMMUMMUMMENUMIIIIMMUUMMINUMMIS', ' '), ('EMIMMIUMMEHMEMEIMMIMMINIMMUMMMINIMMEMINIMMUMUMMEMMINIUMMUNIEli', ' '), ('IIIMIUMIUMMUMMIMMOMINIMMIMMIMMIMMIHMIMMIIMMINIMUMMUMMIMMUMUMMEMIMMIMMIMM', ' '), ('ANIMMIIMMIMMIIMMIIMIMMIWIMMUMWHIMOMMOMMIIMMIHMUMMIMIUMMIMMEMIIMMUMMUMMENUMIIIIMMUUMMINUMMIS', ' '), ('MINIMMIIIIIIIIIIMIHWOHIMUlla', ' '), ('liMMIIIIIMIIIIIIIIIIIIIIIIIIIIIIIIIIIIIE', ' ')]
LibM19110101-V06-01-page1.txt: [('...ffiummummiummunnummumummmumumummummunamummunummuumummmunummunnummumummumnitumnims', ' '), ('muummmunnummonmummumuummmunimmuPm.n', ' '), ('IIMMIIMMUMMUIMMIMUMMWIMUMMMIMMIMIMMIUMMMUMMUMINWIMBHMMIMMIliniffillinnUffiffill', ' '), ('mmniummiunrimiiiiiiiilfmfiinotoiiimun', ' '), ('WMIWIMIIIIIIIIIIIIIIIMMIUMM', ' '), ('iullnnunulnmmuumnuluunnunuumlt', ' ')]
LibM19110401-V06-02-page48.txt: [('MMMMMIMIMMMMMMMMM', ' '), ('MMMMMMMMMMMMMMMMME', ' '), ('IMMEgMMMMMMMMMMMMMMMMMR', ' '), ('MMMMMMMMMIMMM', ' ')]
LibM19110401-V06-02-page49.txt: [('xxxxxxxxxxxxxxxxxxxxxx', ' '), ('XXXXXXXXXXXXXXXXXXXXX', ' ')]
LibM19110701-V06-03-page1.txt: [('pimumwmummuniumummtimmtunit', ' '), ('Milliummiumunmionwimmimmiumr', ' '), ('iiimillilintirniMMIMMIIIIIiiiiiiiiiitiiiiiiiiiiiiiffilii', ' '), ('IIIIIIIIMMIIIIIIIIIIIIIMMIUM', ' ')]
LibM19110701-V06-03-page48.txt: [('FAIPMKRIVMRIIYAMKRKILSRIIIRRRRIIIIRRRRISIKV', ' ')]
LibM19110701-V06-03-page49.txt: [('xxxxxxxxxxmocxxxxxxxx', ' '), ('XXXXXXXXXXXXXXXXX', ' '), ('XXXXXXXXXXXXXX', ' ')]
LibM19111001-V06-04-page49.txt: [('XXXXXXXXXXXXXXXX', ' '), ('XXXXXXXXXXXXXXXXXXXXXX', ' '), ('XXXXXXXXXXXXXXXXXX', ' ')]
LibM19120401-V07-02-page49.txt: [('mezmommommonommomommommommommmom', ' ')]
LibM19120401-V07-02-page50.txt: [('mnrummommommoncommommmown.mo', ' '), ('mmgrommmmommgrmommmoromrmonorz.', ' ')]
LibM19120401-V07-02-page51.txt: [('nrmomoommomomrsoommommokmagmkwon.', ' ')]
LibM19120701-V07-03-page49.txt: [('Emmonmenommomumommommommonotrnommirnmn', ' ')]
LibM19120701-V07-03-page50.txt: [('XraMOXMOZraMOMMOMMOCMOMMMOMMX', ' '), ('MMOMMMOMMOMMOMMMOM', ' ')]
LibM19120701-V07-03-page51.txt: [('rsomravramcmotrammragmonommxmommansom', ' '), ('mozmnmwommolzemrammonommommommommn', ' '), ('MOMa.netlftrrMMIDHIMIZMMOMMOMMILVMM', ' ')]
LibM19121001-V07-04-page3.txt: [('MMOMMOMMIMOMMOTIMMOTMM', ' ')]
LibM19130101-V08-01-page3.txt: [('MMOMMOMEOMMMMOZMOMMOtrMMgraMOPagr', ' ')]
LibM19130101-V08-01-page4.txt: [('ragMMMMMMMMMMMMMMMMMMMMIMMM', ' ')]
LibM19130401-V08-02-page3.txt: [('MOMEOECIMMOIMOMMOMOMMOIXIMM', ' ')]
LibM19130401-V08-02-page4.txt: [('wralrammimmrzrznomnommgmmonom', ' ')]
LibM19130401-V08-02-page52.txt: [('mmozmrommomommonorummanoz', ' ')]
LibM19130701-V08-03-page3.txt: [('MMOHCOMEMMAragraanilMMMOHM', ' ')]
LibM19140101-V09-01-page3.txt: [('XECTIMMECEMOMMOMMIIMOMMOMMOMME', ' ')]
LibM19140101-V09-01-page4.txt: [('IMMEMMOMMOMMEMN', ' ')]
LibM19140401-V09-02-page3.txt: [('MMMEAMMMMMMMMMMMMMMMMMMMEMMMMM', ' '), ('MMMMMMMMMMMMMMMMMnS', ' ')]
LibM19140401-V09-02-page4.txt: [('MEENMENMMMMMMMMMaWKNMMMMMEg', ' '), ('MRNMIMMMM-riMMMMMMOAMRIMMMMM', ' ')]
LibM19140701-V09-03-page4.txt: [('mmmmmmnswmmEsImmornmmmmmm', ' '), ('MMMMMMMMMMMMMMMMMMMMMM', ' '), ('EHMMMMMMMMMMMMMMMMMMMMMMMMMMMMM', ' ')]
LibM19140701-V09-03-page52.txt: [('.ormucesemmommannumorammosimemaamoutammovomnumEammnommuKumumonmustormmummunno', ' '), ('aommommemsatammogarmaxsorarmwelimMelinuilmenomPommixliniewtlominermiimmurpimumnuommurM', ' ')]
LibM19141001-V09-04-page4.txt: [('MMEMMMMMMMMMMEEMMMEMMMMMMMMMM', ' ')]
LibM19141001-V09-04-page49.txt: [('LMIIIIIIIIIIIIIIIIIII', ' ')]
LibM19141001-V09-04-page50.txt: [('MMMMMMMMMMMMM', ' ')]
LibM19150101-V10-01-page3.txt: [('mmmmommmmmmmwmnirimEmmmmmm', ' '), ('EMMMMMMMNiMMMMMM', ' ')]
LibM19150101-V10-01-page4.txt: [('Mmmnwnsommmmmmmmrmmmolm', ' '), ('MMMMMMMMMMMMMMMMMMMX', ' ')]
LibM19150101-V10-01-page52.txt: [('MMMMMMMMMUIMMMMMMRIMMMMMMMMMMMMMMM', ' ')]
LibM19150401-V10-02-page3.txt: [('MMMMMMMMERMMMMMMMMMMMMMMM', ' ')]
LibM19150401-V10-02-page4.txt: [('MMMMMNEMMOMMMNMMMME', ' '), ('ESNMEMMMMMMRiMMMMEENMMEMMM', ' ')]
LibM19150401-V10-02-page49.txt: [('Illlllulllllllllllllllllllllllllllll', ' ')]
LibM19150701-V10-03-page3.txt: [('MMMMMMffiRiMMMMMMMMMMMMEMMMMMMM', ' ')]
LibM19150701-V10-03-page4.txt: [('moNmmaimmEnimmmmmmmEmm', ' '), ('NFIEMMEEMMEEMMMEEEEEEEEEEEEMEEEM', ' ')]
LibM19151001-V10-04-page48.txt: [('MgiMMMMMMMMMMMMMMMMMMMMMMMMMMMM', ' '), ("MMMOSMERMSIMMMEMMNM'iligH", ' ')]
LibM19151001-V10-04-page49.txt: [('yffinsmEmmmmmmmwmswmmmmmmmnim', ' '), ('MMMMEiMMIMMMMMMMMMMMMMMTTMMMMMTIM', ' ')]
LibM19151001-V10-04-page50.txt: [('Yhmommomownwmmmmmmm', ' ')]
LibM19160101-V11-01-page4.txt: [('MMI.I.I.IIWIFINMOMM...MM.M', ' ')]
LibM19160101-V11-01-page51.txt: [('rnomommonoszuzummummanmmollommom', ' ')]
LibM19160101-V11-01e-page1.txt: [('immumnitommuummunitimmtwuntnimmummiona', ' ')]
LibM19160401-V11-02-page26.txt: [('t..glilihiliiiiiiraiii', ' ')]
LibM19160401-V11-02e-page1.txt: [('maimimiummaimmismilinuminutimmuminiumilmmitimmummumwmoimminummiumnimmititilowinitimiiiti', ' '), ('lllllllllllllll', ' ')]
LibM19170701-V12-03-page3.txt: [('IIIIIIIIIIIII', ' ')]
LibM19171001-V12-04-page19.txt: [('YIIIIIIIIIIIIIII', ' ')]
LibM19171001-V12-04-page3.txt: [('iiiiiiiiiii.c.ii', ' ')]
LibM19180101-V13-01-page18.txt: [('Lffiffimffithimmouninoffimmommummuommunimmonwiniiiiminnumumminriumminlimminiiiiiiiiiiimmonimum', ' '), ('Lffiffimffithimmouninoffimmommummuommunimmonwiniiiiminnumumminriumminlimminiiiiiiiiiiimmonimum', ' ')]
LibM19180101-V13-01-page19.txt: [('inimiiiiiminunimmilimumusinominimuninimmilmr.anumilimminiumminnimminumminnummiliniummiliml', ' ')]
LibM19180401-V13-02-page3.txt: [('ossionosollSoMaSISMISIIIIIISIIIMISSIMISOMallaallallaillafflUSS', ' '), ('USSOSIIIMIWOHMIIIIIIIISMISOISISIIIIMinallIOISOISOISoisosiososi.', ' '), ('ossionosollSoMaSISMISIIIIIISIIIMISSIMISOMallaallallaillafflUSS', ' ')]
LibM19180701-V13-03-page3.txt: [('smossmunssunommummusnmussmssmissussmsmussmmssmissmossmussussummmmusstmosssmsmssmnnsmimmumsmimmwsrmossumms', ' '), ('ssumsffismssumusummummtmussessumnsumussunstsmossmossmwsussmumunnmunsummossumsnwssumminimmsnintminimmusmussinissunues', ' ')]
LibM19181001-V13-04-page16.txt: [('iiiiiiiiiiiiiiiiiiiiiii', ' ')]
LibM19181001-V13-04-page3.txt: [('nifilnIMIfintilllillflillifilnifilmiummiiffillfill', ' ')]
LibM19190101-V15-01-page3.txt: [('siiiiiiilitaiiiiiiiiiaisill', ' '), ('alliallallialliallaillaSSIIIIIIIIIIIMIIIIIIIIIIiiim', ' '), ('alliallallialliallaillaSSIIIIIIIIIIIMIIIIIIIIIIiiim', ' ')]
LibM19190401-V15-02-page14.txt: [('IIIIIIIIIIIIIIII', ' '), ('HIIMIIIIIIIIIIIIIIIINIIUMIUMINIMUI', ' ')]
LibM19190401-V15-02-page15.txt: [('IWIIIIIIIIIIIIIIIIII', ' ')]
LibM19190401-V15-02-page18.txt: [('iiiiiiiinillitiiii', ' ')]
LibM19190401-V15-02-page3.txt: [('immumimilimitmliminiiimiiiiiiiiiiiiiiiiiiiimmiiiiiiiiiiimmintmill', ' '), ('atssussusumoususissonclaCIIIMMIIMMISISCOMMISSI', ' '), ('immumimilimitmliminiiimiiiiiiiiiiiiiiiiiiiimmiiiiiiiiiiimmintmill', ' '), ('.MMIMUMUMWWWIIIIIIIIIIIIIIIIIIIIIIIIIIIIIlleeleteeemememme', ' ')]
LibM19190701-V15-03-page3.txt: [('IIIIIIIIIIIIIIIIIIII', ' '), ('IIIIIIIIIIIIIIIIIIIII', ' ')]
LibM19190701-V15-03-page4.txt: [('IIIIIIIIIIIIIII', ' ')]
LibM19191001-V15-04-page14.txt: [('miiiiiniiiiiiimilimiiiiiiiiiiimiliiiiiimmionimmiumingiiiiiiiiiiiimmiliiiimmomiiiiminwiliiiiiiiiiiiiiiiiiinminsummuilimiliiinimonnimmiiiiiiiiiiiiiiiiiiimiimiq', ' '), ('miiiiiniiiiiiimilimiiiiiiiiiiimiliiiiiimmionimmiumingiiiiiiiiiiiimmiliiiimmomiiiiminwiliiiiiiiiiiiiiiiiiinminsummuilimiliiinimonnimmiiiiiiiiiiiiiiiiiiimiimiq', ' ')]
LibM19191001-V15-04-page17.txt: [('mimmutinsimiunimminimmummusilinnimmimuminnumminnimmummilinuffisliinummimilmilimitimiumminniiniiitimitimmimmilimititinnum', ' '), ('.itmlinillitiniiimmullimitilittiminunitiffitiminimmituniumnitmitilistimmilimutiiiiiimitimitintiumnimmummitm', ' '), ('unimilismimitimittnismitimmimittlimummumitemitimmummmintimmimiumiumnitimllminiummuntiummilmi', ' '), ('mimmutinsimiunimminimmummusilinnimmimuminnumminnimmummilinuffisliinummimilmilimitimiumminniiniiitimitimmimmilimititinnum', ' '), ('.itmlinillitiniiimmullimitilittiminunitiffitiminimmituniumnitmitilistimmilimutiiiiiimitimitintiumnimmummitm', ' '), ('mimmutinsimiunimminimmummusilinnimmimuminnumminnimmummilinuffisliinummimilmilimitimiumminniiniiitimitimmimmilimititinnum', ' ')]
LibM19191001-V15-04-page27.txt: [('IIIMUUMIMIIIIIIMHOMIDfinnlinlinnflUnNHOHHOHIMIMHHOMMIlinlinflO', ' ')]
LibM19191001-V15-04-page28.txt: [('HIIIIIIIIIIIIIIIII', ' '), ('IIIIIIIIIIIlIIIIIIII', ' '), ('IIIIIIIIIIIIIIIIIII', ' '), ('IIIIIIIIIIIIIIIIIIIII', ' ')]
LibM19200401-V14-02-page16.txt: [('MMIERRIOTITIMMIIEMBITIMIIMERIMIIM', ' '), ('HRIIMMIIIIIMEIMIIIMIHNI', ' ')]
LibM19200401-V14-02-page28.txt: [('IIIIIIIIIIIIIIIIIIIIIIIIIII', ' ')]
LibM19200401-V14-02-page3.txt: [('iiimmiiiiii..', ' ')]
LibM19200401-V14-02-page31.txt: [('.e-illmllommimilimmilummumenimilmnimuningumminumiiiiimilmimmunimifinnilionontimmigimiliiimiffiliffilimiliiiiiiiiiiiiimm', ' '), ('.e-illmllommimilimmilummumenimilmnimuningumminumiiiiimilmimmunimifinnilionontimmigimiliiimiffiliffilimiliiiiiiiiiiiiimm', ' ')]
LibM19200401-V14-02-page32.txt: [('smmusissommummusismussmimussissusissimmimmiiiiiiium', ' ')]
LibM19200401-V14-02-page36.txt: [('mcommommommuommommonomm', ' '), ('nunnnunnuuuuuuuuuuunnuuuuuuuuuunnnuuunmuuuumusuuuuunuuuuuuuuuuuuuuuununnnnnunuuuuunuuuuuuuuunuwuum', ' '), ('nunnnunnuuuuuuuuuuunnuuuuuuuuuunnnuuunmuuuumusuuuuunuuuuuuuuuuuuuuuununnnnnunuuuuunuuuuuuuuunuwuum', ' ')]
LibM19200401-V14-02-page5.txt: [('ENEIIIIIIIESEIIIE', ' ')]
LibM19200701-V14-03-page12.txt: [('rrigtreatiariiirriiriiiiriiiitriVittiOriiilrricTiiilriiiitii', ' '), ('NiViitiesiAliffiliifiiilrimlnii', ' ')]
LibM19200701-V14-03-page15.txt: [('racTIMIriiiiitiriiltililietcliteiViiVittiiiitiEVAlifittiA', ' ')]
LibM19200701-V14-03-page8.txt: [('iitiiiitiWiiiititiVaititigniFittaiiiitQaWilitilitit', ' ')]
LibM19201001-V14-04-page3.txt: [('iiiiiiilliniemniiiiii', ' ')]
LibM19201001-V14-04-page35.txt: [('mouaamaaammmaaaaaaaaamamanmmammmammimaaaaaamaaaaaammIaaaaaa', ' '), ('MOErlrlrEEMOINMEMMOMMMOMMIKUMOE', ' '), ('IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII', ' ')]
In [36]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction8

Average verified rate: 0.981664985503412

Average of error rates: 0.03307226705796038

Total token count: 1451376

In [37]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[37]:
[("'", 1499),
 ('m', 1313),
 ('d', 1253),
 ('e', 997),
 ('w', 951),
 ('t', 834),
 ('n', 773),
 ('r', 677),
 ('f', 631),
 ('g', 383),
 ('x', 271),
 ('u', 206),
 ('k', 192),
 ('tv', 150),
 ('th', 117),
 ('pa', 100),
 ('sunday-law', 92),
 ('z', 82),
 ('ex', 75),
 ('io', 71),
 ('id', 71),
 ('co', 64),
 ('postmaster-general', 62),
 ('re', 59),
 ('ga', 58),
 ('post-offices', 57),
 ('mo', 57),
 ('un-american', 57),
 ('statute-books', 56),
 ('va', 56),
 ('sunday-closing', 54),
 ('church-and-state', 49),
 ('mm', 45),
 ('q', 44),
 ('un', 43),
 ('mt', 42),
 ('attorney-general', 41),
 ('tion', 40),
 ('sunday-rest', 39),
 ('wm', 38),
 ('pp', 38),
 ('charta', 37),
 ('ro', 35),
 ('li', 35),
 ('neander', 31),
 ('-', 30),
 ('seventhday', 30),
 ('mi', 28),
 ('ky', 28),
 ('religio-political', 27)]
In [38]:
reports.long_errors(errors_summary, min_length=15)
Out[38]:
(['countermemorialists',
  'theconstitutionof',
  'well-intentioned',
  'antiprohibitionists',
  'immmotzmotatmtmommzum',
  'sundayobservance',
  'disease-resisting',
  'less-enlightened',
  'vuaziffiemunimeluitennotinutnnifin',
  'cavendish-benand',
  'comizairadtgicao',
  'church-and-stateunion',
  'constitutionalty',
  'california-nevada',
  'winnington-ingram',
  'iiiirreriiitlhinifid',
  'boarding-schools',
  'simmmismwklaiigitil',
  're-establishment',
  'publishing-houses',
  'rwiumwimmiiiiimimmumnii',
  'pilurprmarasigimmt',
  'preventivejurisdiction',
  "religio'political",
  'miilmilliiimilliifilmidid',
  'enosnantiemotainotientetiemtio',
  'one-day-in-seven',
  'elanornelkiiisre',
  'relies-political',
  'narrow-mindedness',
  'harmless-looking',
  'seventh-day-observing',
  'jeradycerelsolid',
  'ititeiltintonecfctration',
  'non-commissioned',
  'migininaugimmikimmu',
  'latitteilommtwtfifolror',
  'mvstimpsmgrecuttliv',
  'mgraotrtraccommozraglgraccommicami',
  'ex-vice-president',
  'prcestantissinium',
  'iiiiiiiiiiimmiumulinuilmilne',
  'above-referred-to',
  'fourteen-per-cent',
  'better-established',
  'nemmiwiiiimortrinl',
  'counter-petitioners',
  'inimlfilninninilli',
  'non-intoxicating',
  'gawavaiaaamminonwirit',
  'blood-guiltiness',
  'mememeeememememe',
  'whowrotethefamous',
  'penmenisrisdinaorabsesiceewer',
  'affindlitilffilillikvillehd',
  'life-disagreeable',
  'twice-interrupted',
  'antiecclesiastical',
  'tinitoriinlintol',
  'politico-religious',
  'much-appealed-to',
  'religiopolitical',
  'glillilisibffille',
  'unemeeeeeeneeleeneeetelli',
  'omortioionososom',
  'vaaffisl-co-pacific',
  'statesprinciples',
  'eimf-immmmnmnmlne',
  'religion-and-state',
  'rwraeadttchehman',
  'long-established',
  'msossgmaiaassmgeamakawmalnarlaa',
  'lecosniiionpainoticsovicesfirde',
  "linunimmimrs'inumumu",
  'litico-religious',
  'rrrprrrrrrrritrrrf',
  'lamjukgmdavagixiatm',
  'toforeigncountries',
  'tixtreciremyemiresnirtiortiorrioritortiorrii',
  'democraticrepublican',
  'medico-actuarial',
  'controversialist',
  'hihinhiniiiiiiirin',
  'wind-instruments',
  'twenty-four-hour',
  'two-and-a-halfmile',
  'exemption-clause',
  'estateifpuprenle',
  'commander-in-chief',
  'self-determination',
  'nmmmommrsonomrznemonmonomnrmotruomonom',
  'feeble-mindedness',
  'tsereanctosrothciertny',
  'church-membership',
  'snlrnuurinunuununa',
  'rimareinsmiummisimememesiermem',
  'tiarezemieeleismikiimeeemiewew',
  'emerhilsamalsinalso',
  'non-interference',
  'burckhardt-schatzmann',
  'constitution-makers',
  "the'constitution",
  'feemowiwiedimeiersig',
  'one-day-rest-inseven',
  'postmaster-general',
  'establishingreligious',
  'one-day-rest-in-seven',
  'shriveled-souled',
  'emelieniwionsavibannotisloneemite',
  'vice-president-elect',
  'self-renunciation',
  'self-disciplined',
  'innocent-looking',
  'fourteen-year-old',
  'state-established',
  'civilinstitution',
  'quasi-conquerors',
  'inter-brotherhood',
  'anti-evangelical',
  'ramtersimrammemarkirracarmermartm',
  'mmipoinnonfoemnnioannim',
  'nomenegvoicedienast',
  'actof-parliament',
  'inoomalloisossimis',
  'religion-andstate',
  'sixteenth-century',
  'erimmuralcotemurc',
  'uncommercialized',
  'iillrieeiaiiirriardi',
  'agaomoorwairalioigtiargial',
  'ihilibillilltreterita',
  'publishing-house',
  'vagtookagtookaog',
  'selfpreservation',
  'secretary-ofthe-interior',
  'conscience-fettered',
  'maramommraosommu',
  'campbell-bannerman',
  'character-making',
  'religio-political',
  'impreeloreesocoeselaal',
  'dyed-in-the-wool',
  'attorney-general',
  'cannikin-clinking',
  'ffassininsonsiwoloolgasers',
  'lieutenantgovernor',
  'jtuemmmmmwinimnir',
  'non-sunday-observing',
  'mheminuffinfillffilimis',
  'sabbath-breaking',
  'inforfaisiomomincomocadoviemmigoimiwa',
  'sssssssssssssssssssss',
  'statesman-preacher',
  'prcestantissimum',
  'tully-wainwright',
  'inter-denominational',
  'assumedjimperial',
  'statute-preserved',
  'nosonmomorwemcwaint',
  'reconstructionists',
  "representatives'",
  'iiiwtierttititiiiit',
  'no-religious-test',
  'ipuitnilinimilliiiinulillluunii',
  'friemoossmormior',
  'wamegkimnmrummmmesemvmmmrmk',
  'miraglia-gullotti',
  'nininimummujimininlini',
  'self-glorification',
  'heaven-appointed',
  'sunday-amusement',
  'self-aggrandizement',
  'avinavvswoirliag',
  'word-controversy',
  'religious-sabbatic',
  'iitoitllislossoliiosill',
  'money-worshiping',
  'intheszealwarfejrrnicenathemoatiry',
  'palace-befitting',
  'religio-constitutional',
  'personal-heart-conversion',
  'american-mexican',
  'state-controlled',
  'personal-liberty',
  'much-to-be-desired',
  'curiosity-gratifying',
  'jskadmemmomendim',
  'mconslfaitmeegtifo',
  'relpresentatives',
  'non-sectarianism',
  'emsmwmmmwmmmnmhoneni',
  'governor-general',
  'selfaggrandizement',
  'religionaboveall',
  'faimmeigegrommegfa',
  'satisfactostruction',
  'sunday-amendment',
  'miommooomoomsoicimuchmusuoihiuoimisiummicosississinasseeememeescs',
  'monommomozragrammxragnm',
  'statuteintrenched',
  'teiiiiriafinemie',
  'pecsetemmeltigazolom',
  'trgatimedimegoovemotwo',
  'scandalousassault',
  "attorney-general's",
  'rimmineiiiiiiiiimirre',
  'ex-congregational',
  'sunday-enforcement',
  'beverage-factory',
  'selfgratification',
  'nitroenrtenaddlimeg',
  'mititayerwiriiiinicrierier',
  'sundaymailreports',
  'liberty-imparting',
  'twenty-four-hour-day',
  'liberty-bestowing',
  'special-delivery',
  'misunderstanaing',
  'mimmuiummommosowl',
  'criiitriatoyearetriarmireirntrecltwieviretriarctieanyaremiractmiteetreowehatio',
  'state-and-religion',
  'compulsory-sunday-law',
  'unconstitunation',
  'gishrimmmmomnmon',
  'associate-justices',
  'secretary-of-war',
  'consaalermtooldlny',
  'unanswerableness',
  'infludemonstrated',
  'self-destructive',
  'counterdenunciations',
  'church-dominated',
  'religio-politico',
  'thanksgiving-day',
  'countermemorials',
  'windsor-on-hudson',
  'obviouslyagreement',
  'busideteriorating',
  'one-dayof-rest-in-seven',
  'state-intrenched',
  'attendstipulating',
  'entlimimmimiemil',
  'warm-heartedness',
  'illrilohlietflir',
  'history-confirming',
  'semi-ecclesiastical',
  'secular-rest-day',
  'tomplonsesolomerol',
  'intelligent-looking',
  'ivosengtoexirmemed',
  'suspension-bridges',
  'self-righteousness',
  'near-prohibition',
  'day-rest-in-seven',
  'statute-enforced',
  'weiverreitaararforreahaarivitoroyerriiivii',
  'five-million-dollar',
  'twentieth-century',
  'politico-ecclesiastical',
  'counterallegations',
  'alaska-yukon-pacific',
  'ecemoictiemememoodemeeeme',
  'sabbath-breakers',
  'commandment-keepers',
  'trothofabusesandegurpatent',
  'trading-with-the-enemy',
  'go-to-churchor-stay-indoors',
  'bureau-of-military-intelligence',
  'seventeenth-century',
  'self-destruction',
  'kiderlen-waechter',
  'betterthan-thous',
  'antitrinitarians',
  'act-of-parliament',
  'self-preservation',
  'prescott-wilson-tumulty',
  'post-reformation',
  'brigadier-general',
  'government-makers',
  'emirmeilsaarsinemiliehmee',
  'rograssmargmeermirl',
  'ttimilimmumulminnittinitintinninitutimmi',
  'frankfort-on-the-main',
  'consumption-cure',
  'disestablishtvent',
  'church-collective',
  'religious-legislation',
  'intfilnilhimirimihimmihimiirminlnimimiriminiimium',
  'one-day-of-rest-in-seven',
  'fillikifineffilia',
  'parochial-school',
  'over-encouraging',
  'uvrapsimisulswipampiampv',
  'eadergettlevaaled',
  'lieutenant-colonel',
  'all-comprehensive',
  'double-mindedness',
  'man-administered',
  'counter-movement',
  'half-pintof-claret',
  'counter-argument',
  'world-conscience',
  'parochial-school-system',
  'ztkirmintzflrmerifranc',
  'mramiluesimairrimamesiemiamemilie',
  'agretiitilitltitstriffigtisifitiveram',
  'microbedestroying',
  'wommiumniffunivirlsoir',
  'maher-shalal-hash-baz',
  'governmentsupported',
  'misrepreapproved',
  'heaven-enlightened',
  'five-hundred-word',
  'incomparabiabove',
  "postmaster-general's",
  'succeedinggenerations',
  'imememememeinimeimii',
  'sunday-observance',
  'restaurant-keeper',
  'generous-hearted',
  'self-contradictory',
  'seven-daysa-week',
  'ostammosanosonsorr',
  'rnitivittiltifirmi',
  'muniummmitimlinini',
  "will-o'-the-wisp",
  'counterdemonstrations',
  'i-ifidairicliiiriiirroi',
  'church-and-state',
  'thirty-five-gram',
  'thefactthattheyinvolvethevitalprinciple',
  'imeemiumeemeemene',
  'inmpaiavimmipamipammmiximp',
  'demonstrainfluence',
  'all-day-everyday',
  'self-constituted',
  'notpersonalities',
  'antichristianism',
  'self-condemnatory',
  'trust-and-combine',
  'separationwhichis',
  'director-general',
  'vriliriiifiertailitarectrinfeltriatiatictitlifie',
  'super-government',
  'maher-shalalhash-baz',
  'jerusalem-to-jericho',
  'nolimmowiftwommr',
  'copy-thirty-five',
  "school-teachers'",
  'church-and-state-union',
  'mimmomiosomosoissoisioissossosivissossicsiiiss'],
 15)

Correction 9 -- Split Squashed Words

In [39]:
# %load shared_elements/separate_squashed_words.py
import pandas as pd
from math import log

prev = cycle
cycle = "correction9"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

verified_tokens = []

for filename in corpus:  
    content = utilities.readfile(directories['prev'], filename)
    clean.get_approved_tokens(content, spelling_dictionary, verified_tokens)

tokens_with_freq = dict(collections.Counter(verified_tokens))
words = pd.DataFrame(list(tokens_with_freq.items()), columns=['token','freq'])
words_sorted = words.sort_values('freq', ascending=False)
words_sorted_short = words_sorted[words_sorted.freq > 2]

sorted_list_of_words = list(words_sorted_short['token'])

wordcost = dict((k, log((i+1)*log(len(sorted_list_of_words)))) for i,k in enumerate(sorted_list_of_words))
maxword = max(len(x) for x in sorted_list_of_words)

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    text = utilities.strip_punct(content)
    tokens = utilities.tokenize_text(text)
    
    replacements = []
    
    for token in tokens:
        if not token.lower() in spelling_dictionary:
            if len(token) > 17:
                if re.search(r"[\-\-\'\"]", token):
                    pass
                else:
                    split_string = clean.infer_spaces(token, wordcost, maxword)
                    list_split_string = split_string.split()
                    
                    if clean.verify_split_string(list_split_string, spelling_dictionary):
                        replacements.append((token, split_string))
                    else:
                        pass
            else:
                pass
        else:
            pass
        
    if len(replacements) > 0:
        print("{}: {}".format(filename, replacements))
        
        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
LibM19080101-V03-01-page22.txt: [('thefactthattheyinvolvethevitalprinciple', 'the fact that they involve the vital principle')]
LibM19080401-V03-02-page22.txt: [('countermemorialists', 'counter memorialists')]
LibM19090101-V04-01-page10.txt: [('satisfactostruction', 'sat is fact o st r u c t i o n')]
LibM19090701-V04-03-page34.txt: [('ffaSSININSONSIWOloolgasers', 'f f a S S I N I N S O N S I W O l o o l g a s e r s')]
LibM19101001-V05-04-page1.txt: [('AffindlitilffilillikVillehd', 'A f f i n d l i t i l f f i l i l l i k V i l l e h d')]
LibM19110401-V06-02-page49.txt: [('msossgmAIAASSMgEAMAKAWMALNARLAA', 'ms o s s g m A I A A S S M g E A M A K A W M A L N A R L A A')]
LibM19111001-V06-04-page18.txt: [('obviouslyagreement', 'obviously agreement')]
LibM19120101-V07-01-page50.txt: [('Toforeigncountries', 'To foreign countries')]
LibM19121001-V07-04-page6.txt: [('tomplonsesolomerol', 'tom p l on s e s o l o m e r o l'), ('IMpreeloreesocoeselaal', 'IM p reel ore e s o c o e s e l a a l'), ('emerhilsamalsinalso', 'e m e r h i l s a m a l s i n a l s o'), ('consaalermtooldlny', 'con s a a l er m t o o l d l n y')]
LibM19130101-V08-01-page27.txt: [('counterdemonstrations', 'counter demonstrations')]
LibM19130101-V08-01-page49.txt: [('agaomooRWairaliOigTiargial', 'a g a o m o o R W a i r a l i O i g T i a r g i a l')]
LibM19130401-V08-02-page52.txt: [('Ostammosanosonsorr', 'O st am m o s a n o s o n s o r r')]
LibM19130701-V08-03-page10.txt: [('lieutenantgovernor', 'lieutenant governor')]
LibM19140101-V09-01-page27.txt: [('establishingreligious', 'establishing religious')]
LibM19140101-V09-01-page4.txt: [('infOrfaiSIOMOMINCOMOCADOVIEMMIGOIMIWA', 'in f O r f a i S I O M O M I N C O M O C A D O V I E M M I G O I M I W A')]
LibM19140701-V09-03-page49.txt: [('SIMMMISMWKlaiigitil', 'S IM M M I S M W K l a i i g i t i l')]
LibM19141001-V09-04-page10.txt: [('counterdenunciations', 'counter denunciations')]
LibM19150101-V10-01-page53.txt: [('nosonmomorwemcwaint', 'no son mom or we m c w a i n t')]
LibM19150701-V10-03-page18.txt: [('governmentsupported', 'government supported')]
LibM19160101-V11-01e-page1.txt: [('sssssssssssssssssssss', 's s s s s s s s s s s s s s s s s s s s s')]
LibM19161001-V11-04-page22.txt: [('counterallegations', 'counter allegations')]
LibM19170101-V12-01-page13.txt: [('democraticrepublican', 'democratic republican')]
LibM19180101-V13-01-page5.txt: [('reconstructionists', 'reconstruction i sts')]
LibM19180101-V13-01-page9.txt: [('antiprohibitionists', 'anti prohibitionists')]
LibM19180401-V13-02-page3.txt: [('inoomalloISOSSIMIS', 'in o o m a l l o I S O S S I M I S'), ('MIMMOMIOSOMOSOISSOISIOISSOSSOSIVISSOSSICSIIISS', 'MIM MOM I O SO M O S O I S S O I S I O I S S O S S O S I V I S S O S S I C S I I I S S')]
LibM19180701-V13-03-page12.txt: [('selfaggrandizement', 'self aggrandizement')]
LibM19180701-V13-03-page27.txt: [('antiecclesiastical', 'anti ecclesiastical')]
LibM19180701-V13-03-page3.txt: [('inimlfilninninilli', 'in im l f i l n i n n i n i l l i')]
LibM19181001-V13-04-page13.txt: [('HIHINHINIIIIIIIRIN', 'HI H IN H IN III III IR IN')]
LibM19190101-V15-01-page3.txt: [('iitoitllislossoliiosill', 'ii to it l l is loss o l ii o s ill')]
LibM19190401-V15-02-page3.txt: [('MIOMMOOOMOOMSOICIMUCHMUSUOIHIUOIMISIUMMICOSISSISSInasseeememeescs', 'M I O M M O O O M O O M S O I C I M U C H M U S U O I H I U O I M I S I U M M I C O S I S S I S S I n a s s e e e m e m e e s c s')]
LibM19190701-V15-03-page36.txt: [('lecosniiionpainOticsovicesfirde', 'le c o s n i i i o n p a i n O t i c s o v i c e s f i r d e')]
LibM19191001-V15-04-page27.txt: [('MIilmilliiimilliifilMIDID', 'M I i l m i l l i i i m i l l i i f i l M I D I D')]
LibM19200401-V14-02-page13.txt: [('IMEMEMEMEMEINIMEIMII', 'I ME ME ME ME ME IN I ME IM II')]
LibM19200401-V14-02-page31.txt: [('RIMMINEIIIIIIIIIMIRRE', 'RIM MIN E I I I I I I I I I M I R R E')]
LibM19200701-V14-03-page12.txt: [('INMPAIAVIMMIPAMIPAMMMIXIMP', 'IN M P A I A V I M M I P A M I P A M M M I X I M P'), ('iillrieeiaiiirriardi', 'i ill r i e e i a i i i r r i a r d i')]
LibM19200701-V14-03-page20.txt: [('UVRAPSIMISULSWIPAMPIAMPV', 'U V R A P S I M I S U L S W I P A M P I A M P V'), ('weiverreitaararforreahaarivitoroyerriiivii', 'we iv err e i t a a r a r f o r r e a h a a r i v i t o r o y e r r i i i v i i')]
LibM19200701-V14-03-page3.txt: [('ipuitnIlinimilliiiinulillluunii', 'i p u i t n I l i n i m i l l i i i i n u l i l l l u u n i i')]
In [40]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction9

Average verified rate: 0.9815728089947997

Average of error rates: 0.0331019809244314

Total token count: 1452039

In [41]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[41]:
[("'", 1499),
 ('m', 1365),
 ('d', 1261),
 ('e', 1023),
 ('w', 958),
 ('t', 844),
 ('n', 797),
 ('r', 704),
 ('f', 643),
 ('g', 391),
 ('x', 272),
 ('u', 218),
 ('k', 195),
 ('tv', 150),
 ('th', 117),
 ('pa', 100),
 ('sunday-law', 92),
 ('z', 82),
 ('ex', 75),
 ('io', 71),
 ('id', 71),
 ('co', 64),
 ('postmaster-general', 62),
 ('re', 59),
 ('ga', 58),
 ('post-offices', 57),
 ('mo', 57),
 ('un-american', 57),
 ('statute-books', 56),
 ('va', 56),
 ('sunday-closing', 54),
 ('church-and-state', 49),
 ('mm', 45),
 ('q', 44),
 ('un', 43),
 ('mt', 42),
 ('attorney-general', 41),
 ('tion', 40),
 ('sunday-rest', 39),
 ('wm', 38),
 ('pp', 38),
 ('charta', 37),
 ('ro', 35),
 ('li', 35),
 ('neander', 31),
 ('-', 30),
 ('seventhday', 30),
 ('mi', 28),
 ('ky', 28),
 ('religio-political', 27)]
In [42]:
reports.docs_with_high_error_rate(summary)
Out[42]:
[('LibM19200401-V14-02-page4.txt', 1.0),
 ('LibM19060401-V01-01-page2.txt', 1.0),
 ('LibM19140701-V09-03-page52.txt', 0.857),
 ('LibM19110701-V06-03-page1.txt', 0.824),
 ('LibM19080101-V03-01-page1.txt', 0.812),
 ('LibM19090401-V04-02-page33.txt', 0.778),
 ('LibM19110101-V06-01-page1.txt', 0.769),
 ('LibM19191001-V15-04-page28.txt', 0.75),
 ('LibM19110701-V06-03-page4.txt', 0.729),
 ('LibM19080401-V03-02-page19.txt', 0.714),
 ('LibM19080701-V03-03-page1.txt', 0.687),
 ('LibM19100101-V05-01-page1.txt', 0.676),
 ('LibM19111001-V06-04-page1.txt', 0.667),
 ('LibM19090401-V04-02-page1.txt', 0.667),
 ('LibM19110401-V06-02-page1.txt', 0.662),
 ('LibM19081001-V03-04-page1.txt', 0.66),
 ('LibM19080401-V03-02-page1.txt', 0.659),
 ('LibM19130701-V08-03-page2.txt', 0.657),
 ('LibM19100401-V05-02-page1.txt', 0.645),
 ('LibM19080701-V03-03-page41.txt', 0.619),
 ('LibM19100701-V05-03-page1.txt', 0.615),
 ('LibM19140401-V09-02-page1.txt', 0.611),
 ('LibM19120701-V07-03-page4.txt', 0.605),
 ('LibM19090401-V04-02-page2.txt', 0.6),
 ('LibM19170401-V12-02-page1.txt', 0.583),
 ('LibM19150401-V10-02-page1.txt', 0.577),
 ('LibM19170701-V12-03-page1.txt', 0.566),
 ('LibM19180101-V13-01-page4.txt', 0.558),
 ('LibM19090101-V04-01-page1.txt', 0.543),
 ('LibM19060401-V01-01-page35.txt', 0.529),
 ('LibM19191001-V15-04-page1.txt', 0.524),
 ('LibM19170701-V12-03-page4.txt', 0.5),
 ('LibM19120401-V07-02-page4.txt', 0.5),
 ('LibM19101001-V05-04-page1.txt', 0.478),
 ('LibM19180101-V13-01-page1.txt', 0.471),
 ('LibM19121001-V07-04-page1.txt', 0.471),
 ('LibM19190701-V15-03-page36.txt', 0.456),
 ('LibM19200101-V14-01-page1.txt', 0.455),
 ('LibM19121001-V07-04-page6.txt', 0.45),
 ('LibM19070401-V02-02-page36.txt', 0.444),
 ('LibM19090701-V04-03-page1.txt', 0.419),
 ('LibM19111001-V06-04-page52.txt', 0.407),
 ('LibM19151001-V10-04-page7.txt', 0.406),
 ('LibM19190401-V15-02-page1.txt', 0.4),
 ('LibM19200401-V14-02-page35.txt', 0.393),
 ('LibM19071001-V02-04-page18.txt', 0.389),
 ('LibM19130701-V08-03-page26.txt', 0.387),
 ('LibM19140101-V09-01-page56.txt', 0.383),
 ('LibM19141001-V09-04-page52.txt', 0.363),
 ('LibM19071001-V02-04-page51.txt', 0.36),
 ('LibM19140701-V09-03-page1.txt', 0.333),
 ('LibM19200701-V14-03-page1.txt', 0.333),
 ('LibM19080101-V03-01-page2.txt', 0.333),
 ('LibM19201001-V14-04-page1.txt', 0.333),
 ('LibM19120401-V07-02-page40.txt', 0.32),
 ('LibM19090401-V04-02-page51.txt', 0.312),
 ('LibM19160701-V11-03-page1.txt', 0.312),
 ('LibM19140701-V09-03-page4.txt', 0.307),
 ('LibM19090701-V04-03-page51.txt', 0.294),
 ('LibM19180701-V13-03-page4.txt', 0.294),
 ('LibM19140701-V09-03-page49.txt', 0.288),
 ('LibM19150101-V10-01-page1.txt', 0.286),
 ('LibM19190101-V15-01-page4.txt', 0.273),
 ('LibM19130701-V08-03-page27.txt', 0.273),
 ('LibM19090101-V04-01-page21.txt', 0.262),
 ('LibM19060401-V01-01-page36.txt', 0.25),
 ('LibM19200401-V14-02-page1.txt', 0.25),
 ('LibM19120401-V07-02-page38.txt', 0.245),
 ('LibM19151001-V10-04-page1.txt', 0.24),
 ('LibM19090101-V04-01-page52.txt', 0.239),
 ('LibM19130401-V08-02-page1.txt', 0.235),
 ('LibM19121001-V07-04-page4.txt', 0.233),
 ('LibM19120701-V07-03-page1.txt', 0.231),
 ('LibM19141001-V09-04-page1.txt', 0.222),
 ('LibM19070401-V02-02-page35.txt', 0.222),
 ('LibM19130101-V08-01-page1.txt', 0.214),
 ('LibM19170401-V12-02-page4.txt', 0.214),
 ('LibM19131001-V08-04-page1.txt', 0.211),
 ('LibM19080701-V03-03-page52.txt', 0.21),
 ('LibM19080101-V03-01-page51.txt', 0.208),
 ('LibM19130701-V08-03-page1.txt', 0.208),
 ('LibM19100101-V05-01-page31.txt', 0.206),
 ('LibM19090701-V04-03-page42.txt', 0.206),
 ('LibM19150101-V10-01-page12.txt', 0.202)]
In [43]:
# %load shared_elements/high_error_rates.py
doc_keys = [x[0] for x in reports.docs_with_high_error_rate(summary) if x[1] > 0.5]

# utilities.open_original_docs(doc_keys, directories['cycle'])
In [ ]: