LibM-OCR-Evaluation-and-Correction
In [1]:
%load_ext autoreload
In [2]:
%autoreload 2
In [3]:
from text2topics import reports
from text2topics import utilities
from text2topics import clean
import re
import os
from os import listdir
from os.path import isfile, join
import collections
In [4]:
%matplotlib inline
In [5]:
wordlist_dir = "/Users/jeriwieringa/Dissertation/drafts/data/word-lists"
wordlists = ["2016-12-07-SDA-last-names.txt",
"2016-12-07-SDA-place-names.txt",
"2016-12-08-SDA-Vocabulary.txt",
"2017-01-03-place-names.txt",
"2017-02-14-Base-Word-List-SCOWL&KJV.txt",
"2017-02-14-Roman-Numerals.txt",
"2017-03-01-Additional-Approved-Words.txt"
]
In [6]:
spelling_dictionary = utilities.create_spelling_dictionary(wordlist_dir, wordlists)
In [7]:
title = "LibM"
In [8]:
base_dir = "/Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/{}/".format(title)
Baseline¶
In [9]:
cycle = 'baseline'
In [10]:
stats = reports.overview_report(join(base_dir, cycle), spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/baseline Average verified rate: 0.9276951364862356 Average of error rates: 0.08840278796771826 Total token count: 1502679
In [11]:
errors_summary = reports.get_errors_summary( stats )
reports.top_errors( errors_summary, 500 )
Out[11]:
[('ñ', 5427),
('-', 1800),
('re-', 1618),
('con-', 1590),
("'", 1508),
('tion', 1443),
('m', 1251),
('d', 1242),
('¥', 1236),
('in-', 1093),
('w', 937),
('e', 925),
(')', 906),
('ment', 864),
('t', 803),
('n', 748),
('de-', 745),
('be-', 695),
('+', 648),
('com-', 643),
('r', 626),
('f', 594),
('pro-', 571),
('sun-', 502)]
Check Special Character Use¶
In [12]:
reports.tokens_with_special_characters(errors_summary)[:200]
Out[12]:
[('ñ', 5427),
('¥', 1236),
(')', 906),
('+', 648),
('(', 478),
('/', 388),
('=', 193),
('(affiliated', 173),
('*', 169),
('•', 137),
('ã', 134),
('(affil-', 132),
('(af-', 119),
('_', 99),
('(see', 94),
('¥¥', 76),
('%', 73),
('[the', 72),
('(or', 65),
(']', 61),
('¥¥¥', 56),
('(the', 55),
('ó', 52),
('year)', 49),
('[', 49),
('\\', 43),
('newfoundland)', 43),
('carolina)', 43),
('alaska)', 42),
('wyoming)', 42),
('mexico)', 42),
('wisconsin)', 42),
('mississippi)', 42),
('maryland)', 42),
('dakota)', 42),
('island)', 42),
('arizona)', 42),
('(secretary', 40),
('(western)', 39),
('(affili-', 38),
('++', 37),
('o)', 36),
('(southern)', 35),
('(payable', 33),
('ô', 32),
('(continued', 31),
('(west)', 31),
('>', 30),
('¥¥¥¥', 30),
('(s', 30),
('(h', 29),
('`', 29),
('office)', 25),
('longacre)', 25),
('(a', 24),
('england)', 21),
('¥=', 20),
('(and', 19),
('[of', 19),
('[sunday]', 19),
('chesapeake)', 18),
('(which', 18),
('(eastern)', 18),
('southern)', 18),
('m¥', 18),
('(concluded', 18),
('(south)', 17),
('(north)', 17),
('tennessee)', 17),
('¡', 17),
('(east)', 17),
('(northern)', 16),
('„', 16),
('catholic)', 15),
('(i', 14),
('sunday]', 14),
('(washington', 14),
('(tennessee)', 14),
('=¥', 14),
('ñthe', 14),
('(over)', 13),
('(page', 13),
('#', 13),
('(australia)', 13),
('(england)', 13),
('¤', 13),
('\ufeff', 13),
('(roman', 13),
('(france)', 12),
('(italics', 12),
('(nebraska)', 12),
('i%', 12),
('¥¥¥¥¥¥', 12),
('**', 12),
('(massachusetts)', 12),
('(exchange', 11),
('sunday)', 11),
('(if', 11),
('¥-', 11),
('(peru)', 11),
('saskatchewan)', 11),
('(chile)', 11),
('(cross', 11),
('—', 10),
('i¥', 10),
('-¥', 10),
('the¥', 10),
('(western', 10),
('[in', 10),
('¥the', 10),
('(a)', 9),
('(california)', 9),
('(e', 9),
('(subscriptions', 9),
('(may', 9),
('(to', 9),
('i)', 9),
('(i)', 9),
("'ñ", 9),
('accepted)', 9),
('<', 9),
('(your', 9),
('(greater)', 9),
('[a', 9),
('desired)', 9),
('c)', 8),
('(b)', 8),
('th¥', 8),
('¤¤', 8),
('post-office)', 8),
('ña', 8),
('(frontispiece)', 8),
('ãã', 8),
('[not', 8),
('(baptist)', 8),
('excepted)', 8),
('(central)', 8),
("'¥", 7),
('//', 7),
('¥and', 7),
('law]', 7),
('york)', 7),
('¥m', 7),
('day]', 7),
('=m', 7),
('(catholic)', 7),
('church]', 7),
('(signed)', 7),
('©', 7),
('(as', 7),
('ñid', 7),
('[mr', 6),
('(two', 6),
('(pa', 6),
('(minnesota)', 6),
('••', 6),
('/m', 6),
('m=', 6),
('(mr', 6),
('~~', 6),
('=¥¥', 6),
('(n', 6),
('(for', 6),
('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++', 6),
('[for', 6),
('(by', 6),
('(canadian', 6),
('(works', 6),
('(sunday)', 6),
('(rev', 6),
('(new', 6),
('day)', 6),
('(in', 6),
('(lay', 6),
('(of', 6),
('¥¥¥¥¥', 5),
('numbers)', 5),
('ñibid', 5),
('(poetry)', 5),
('<>', 5),
('[civil', 5),
('(civil', 5),
('labor)', 5),
('attach\x8e', 5),
('i/', 5),
('(first', 5),
('}', 5),
('(dec', 5),
('(saturday', 5),
('prescott)', 5),
('[sunday', 5),
('♦', 5),
('(r', 5),
('(john', 5),
('[christ]', 5),
('~', 5),
('ñhon', 5),
('(d', 5),
('laws]', 5),
('++++++++++++++++++++++++++++++++++++++++++++++++++++++++', 5)]
Correction 1 -- Normalize Characters¶
In [13]:
# %load shared_elements/normalize_characters.py
prev = cycle
cycle = "correction1"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
# Substitute for all other dashes
content = re.sub(r"—-—–‑", r"-", content)
# Substitute formatted apostrophe
content = re.sub(r"\’\’\‘\'\‛\´", r"'", content)
# Replace all special characters with a space (as these tend to occur at the end of lines)
content = re.sub(r"[^a-zA-Z0-9\s,.!?$:;\-&\'\"]", r" ", content)
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
In [14]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction1 Average verified rate: 0.9373310897970699 Average of error rates: 0.0763136463683052 Total token count: 1492287
In [15]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[15]:
[('-', 1932),
('re-', 1620),
('con-', 1590),
("'", 1578),
('tion', 1446),
('m', 1334),
('d', 1256),
('in-', 1094),
('e', 985),
('w', 952),
('ment', 864),
('t', 837),
('n', 774),
('de-', 745),
('be-', 696),
('r', 663),
('com-', 645),
('f', 624),
('pro-', 572),
('sun-', 505),
('ex-', 450),
('en-', 446),
('tions', 404),
('g', 384),
('ligious', 367),
('per-', 361),
('dis-', 360),
('un-', 357),
('relig-', 351),
('na-', 328),
('gov-', 324),
('ob-', 323),
('chris-', 298),
('govern-', 292),
('x', 265),
('ernment', 260),
('ious', 258),
('ac-', 250),
('erty', 237),
('ance', 236),
('lib-', 236),
('pre-', 235),
('sab-', 234),
('ments', 233),
('ad-', 230),
('reli-', 219),
('tional', 211),
('ligion', 209),
('u', 206),
('im-', 206)]
Correction 2 -- Fix Line Endings¶
In [16]:
# %load shared_elements/correct_line_endings.py
prev = cycle
cycle = "correction2"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
content = re.sub(r"(\w+)(\-\s{1,})([a-z]+)", r"\1\3", content)
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
In [17]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction2 Average verified rate: 0.9784327331755492 Average of error rates: 0.037992296404988996 Total token count: 1452618
In [18]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[18]:
[('-', 1918),
("'", 1578),
('m', 1330),
('d', 1250),
('e', 976),
('w', 951),
('t', 819),
('n', 772),
('r', 662),
('f', 620),
('g', 381),
('x', 265),
('u', 205),
('k', 188),
('tv', 150),
('th', 118),
('sunday-law', 112),
('--', 111),
('postmaster-general', 106),
('pa', 101),
('sunday-closing', 73),
('z', 72),
('un-american', 72),
('id', 71),
('io', 70),
('statute-books', 66),
('post-offices', 66),
('co', 61),
('church-and-state', 60),
('mo', 60),
('ga', 58),
('va', 56),
('attorney-general', 56),
('ex', 51),
('re', 48),
('sunday-rest', 48),
('tion', 45),
('mm', 42),
('q', 41),
('mt', 41),
('wm', 38),
('pp', 38),
('re-', 38),
('ro', 37),
('charta', 37),
('mi', 36),
('li', 36),
('---', 36),
('present-day', 35),
('religio-political', 33)]
Correction 3 -- Remove extra dashes¶
In [19]:
# %load shared_elements/remove_extra_dashes.py
prev = cycle
cycle = "correction3"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
replacements = []
for token in tokens:
if token[0] is "-":
replacements.append((token, token[1:]))
elif token[-1] is "-":
replacements.append((token, token[:-1]))
else:
pass
if len(replacements) > 0:
print("{}: {}".format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
LibM19060401-V01-01-page1.txt: [('support.-', 'support.')]
LibM19060401-V01-01-page10.txt: [('sanc-', 'sanc')]
LibM19060401-V01-01-page11.txt: [('Mc-', 'Mc'), ('Mc-', 'Mc')]
LibM19060401-V01-01-page12.txt: [('organiza-', 'organiza')]
LibM19060401-V01-01-page14.txt: [('altogether."-', 'altogether."')]
LibM19060401-V01-01-page15.txt: [('-the', 'the')]
LibM19060401-V01-01-page17.txt: [('-II.', 'II.'), ('de-', 'de')]
LibM19060401-V01-01-page19.txt: [('-haracterized', 'haracterized')]
LibM19060401-V01-01-page21.txt: [('pe-', 'pe')]
LibM19060401-V01-01-page27.txt: [('-', '')]
LibM19060401-V01-01-page31.txt: [('-', '')]
LibM19060401-V01-01-page32.txt: [('-', '')]
LibM19060401-V01-01-page34.txt: [('-never', 'never'), ('-', ''), ('-', ''), ('-', ''), ('prin-', 'prin')]
LibM19060401-V01-01-page35.txt: [('-', ''), ('-', ''), ('-', ''), ('-gh-l-', 'gh-l-'), ('ner-', 'ner'), ('-', ''), ('-', ''), ('--afilhir', '-afilhir'), ('--', '-'), ('-', ''), ('-', ''), ('-iSW', 'iSW'), ('-', ''), ('-cirm', 'cirm'), ('-', ''), ('--ant', '-ant'), ('-', ''), ('-', ''), ("'e-Ihttnii-ti--", "'e-Ihttnii-ti-"), ('-owl-', 'owl-'), ('-', ''), ('it-', 'it'), ('--', '-'), ('-', ''), ('-ao-te', 'ao-te'), ('-viez', 'viez'), ('-', ''), ('derwee.--', 'derwee.-'), ('..pieLese--', '..pieLese-'), ('o-', 'o'), ('.-', '.'), ('-or--.', 'or--.'), ('tr.-', 'tr.'), ('-', ''), ('-ezel', 'ezel')]
LibM19060401-V01-01-page36.txt: [('SURMOUNT-', 'SURMOUNT')]
LibM19060401-V01-01-page4.txt: [('opin-', 'opin'), ('-', '')]
LibM19060401-V01-01-page6.txt: [('-', '')]
LibM19060401-V01-01-page8.txt: [('en-', 'en')]
LibM19060701-V01-02-page1.txt: [('support.-', 'support.')]
LibM19060701-V01-02-page12.txt: [('LIB-', 'LIB')]
LibM19060701-V01-02-page13.txt: [('-', '')]
LibM19060701-V01-02-page15.txt: [('com-', 'com')]
LibM19060701-V01-02-page18.txt: [('exer-', 'exer')]
LibM19060701-V01-02-page27.txt: [('mis-', 'mis')]
LibM19060701-V01-02-page3.txt: [('C--', 'C-'), ('----', '---'), ('--', '-'), ('-', ''), ('-----------.', '----------.')]
LibM19060701-V01-02-page34.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19060701-V01-02-page5.txt: [('coun-', 'coun')]
LibM19060701-V01-02-page7.txt: [('-', '')]
LibM19061001-V01-03-page11.txt: [('per-', 'per')]
LibM19061001-V01-03-page15.txt: [('inn-', 'inn')]
LibM19061001-V01-03-page17.txt: [('legal-', 'legal'), ('-', '')]
LibM19061001-V01-03-page18.txt: [('-', ''), ('coun-', 'coun')]
LibM19061001-V01-03-page19.txt: [('secu-', 'secu')]
LibM19061001-V01-03-page20.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19061001-V01-03-page24.txt: [('profana-', 'profana')]
LibM19061001-V01-03-page26.txt: [('-governor', 'governor')]
LibM19061001-V01-03-page27.txt: [('-of', 'of'), ('modifi-', 'modifi')]
LibM19061001-V01-03-page28.txt: [('s--', 's-')]
LibM19061001-V01-03-page3.txt: [('-earing', 'earing')]
LibM19061001-V01-03-page30.txt: [('op-', 'op')]
LibM19061001-V01-03-page31.txt: [('Paid-', 'Paid'), ('free-', 'free')]
LibM19061001-V01-03-page34.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19061001-V01-03-page4.txt: [('rhoreh-and-', 'rhoreh-and')]
LibM19061001-V01-03-page8.txt: [('-TOUSES', 'TOUSES')]
LibM19061001-V01-03-page9.txt: [('-', '')]
LibM19070101-V02-01-page12.txt: [('them-', 'them'), ('.-', '.'), ('-', ''), ('A-', 'A')]
LibM19070101-V02-01-page14.txt: [('-conscience', 'conscience')]
LibM19070101-V02-01-page17.txt: [('accord-', 'accord')]
LibM19070101-V02-01-page18.txt: [('Con-', 'Con')]
LibM19070101-V02-01-page2.txt: [('--', '-'), ('-', ''), ('-----', '----'), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('---', '--'), ('-', '')]
LibM19070101-V02-01-page21.txt: [('en-', 'en')]
LibM19070101-V02-01-page22.txt: [('-', '')]
LibM19070101-V02-01-page23.txt: [('Mc-', 'Mc')]
LibM19070101-V02-01-page25.txt: [('Postmaster-', 'Postmaster'), ('in-', 'in'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19070101-V02-01-page3.txt: [('gov-', 'gov')]
LibM19070101-V02-01-page30.txt: [('-', ''), ('-', '')]
LibM19070101-V02-01-page31.txt: [('-', '')]
LibM19070101-V02-01-page34.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19070101-V02-01-page35.txt: [('-', '')]
LibM19070101-V02-01-page7.txt: [('-', '')]
LibM19070101-V02-01-page8.txt: [('un-', 'un')]
LibM19070401-V02-02-page12.txt: [('hun-', 'hun')]
LibM19070401-V02-02-page13.txt: [('Sunday-', 'Sunday')]
LibM19070401-V02-02-page14.txt: [('Sun-', 'Sun')]
LibM19070401-V02-02-page16.txt: [('-', ''), ('enfor-', 'enfor')]
LibM19070401-V02-02-page17.txt: [('LAN-', 'LAN')]
LibM19070401-V02-02-page18.txt: [('men.-', 'men.')]
LibM19070401-V02-02-page19.txt: [('-', '')]
LibM19070401-V02-02-page22.txt: [('Medo-', 'Medo')]
LibM19070401-V02-02-page26.txt: [('consulted.-', 'consulted.')]
LibM19070401-V02-02-page27.txt: [('--', '-'), ('---', '--'), ('-', ''), ('-', ''), ('..-', '..'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.j.i.i.', '.j.i.i.'), ('-f', 'f'), ('-', ''), ('-', ''), ('-I', 'I'), ('I-', 'I'), ("-'il", "'il")]
LibM19070401-V02-02-page3.txt: [('Chi-', 'Chi'), ('-', '')]
LibM19070401-V02-02-page32.txt: [('-', '')]
LibM19070401-V02-02-page34.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19070401-V02-02-page4.txt: [('-', ''), ('Lewis-', 'Lewis'), ('-', '')]
LibM19070401-V02-02-page6.txt: [('an-', 'an')]
LibM19070701-V02-03-page14.txt: [('-', ''), ('rec-', 'rec')]
LibM19070701-V02-03-page15.txt: [('-', ''), ('-', ''), ('at-', 'at')]
LibM19070701-V02-03-page17.txt: [('Indepen-', 'Indepen')]
LibM19070701-V02-03-page18.txt: [('agi-', 'agi')]
LibM19070701-V02-03-page19.txt: [('James-', 'James')]
LibM19070701-V02-03-page20.txt: [('Chris-', 'Chris')]
LibM19070701-V02-03-page21.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19070701-V02-03-page22.txt: [('-', ''), ('FOUNDATIONS.-', 'FOUNDATIONS.')]
LibM19070701-V02-03-page23.txt: [('Mc-', 'Mc'), ('Mc-', 'Mc')]
LibM19070701-V02-03-page24.txt: [('-', '')]
LibM19070701-V02-03-page25.txt: [('-', '')]
LibM19070701-V02-03-page27.txt: [('-', '')]
LibM19070701-V02-03-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19070701-V02-03-page3.txt: [('Protes-', 'Protes')]
LibM19070701-V02-03-page34.txt: [('-', ''), ('-', ''), ('-', ''), ('Act-of-', 'Act-of')]
LibM19070701-V02-03-page8.txt: [('LIB-', 'LIB'), ('-', '')]
LibM19070701-V02-03-page9.txt: [('As-', 'As'), ('-', '')]
LibM19071001-V02-04-page10.txt: [('cor-', 'cor')]
LibM19071001-V02-04-page12.txt: [('-that', 'that')]
LibM19071001-V02-04-page14.txt: [('--', '-'), ('Vice-', 'Vice')]
LibM19071001-V02-04-page15.txt: [('con-', 'con')]
LibM19071001-V02-04-page17.txt: [('in-', 'in'), ('-', ''), ('-', '')]
LibM19071001-V02-04-page18.txt: [('-', '')]
LibM19071001-V02-04-page19.txt: [('-', '')]
LibM19071001-V02-04-page20.txt: [('-', '')]
LibM19071001-V02-04-page21.txt: [('doc-', 'doc')]
LibM19071001-V02-04-page22.txt: [('-', '')]
LibM19071001-V02-04-page23.txt: [('-', '')]
LibM19071001-V02-04-page24.txt: [('rneas-', 'rneas')]
LibM19071001-V02-04-page26.txt: [('----', '---'), ('-', '')]
LibM19071001-V02-04-page27.txt: [('-', '')]
LibM19071001-V02-04-page28.txt: [('-', '')]
LibM19071001-V02-04-page29.txt: [('Record-', 'Record')]
LibM19071001-V02-04-page30.txt: [('decep-', 'decep'), ('-', ''), ('-', '')]
LibM19071001-V02-04-page31.txt: [('-', ''), ('-', '')]
LibM19071001-V02-04-page32.txt: [('law-', 'law'), ('impor-', 'impor'), ('Sab-', 'Sab'), ('-orb', 'orb'), ('re-', 're'), ('-', '')]
LibM19071001-V02-04-page38.txt: [('-', ''), ('essen-', 'essen')]
LibM19071001-V02-04-page40.txt: [('-', '')]
LibM19071001-V02-04-page44.txt: [('--', '-')]
LibM19071001-V02-04-page46.txt: [('-', '')]
LibM19071001-V02-04-page48.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Sov-', 'Sov'), ('-e', 'e'), ('-', '')]
LibM19071001-V02-04-page49.txt: [('-page', 'page'), ('-page', 'page')]
LibM19071001-V02-04-page50.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19071001-V02-04-page51.txt: [('-THAT', 'THAT'), ('-', ''), ('lande.-', 'lande.'), ('temert.-', 'temert.'), ('-', ''), ('Colooiso.-', 'Colooiso.'), ('-', ''), ('velour...ref.-', 'velour...ref.'), ('hands.-', 'hands.'), ('people.-', 'people.'), ('-They', 'They'), ('-W', 'W'), ('-TTE', 'TTE')]
LibM19071001-V02-04-page8.txt: [('hier-', 'hier')]
LibM19080101-V03-01-page1.txt: [('-', ''), ('-.', '.')]
LibM19080101-V03-01-page14.txt: [('-', '')]
LibM19080101-V03-01-page15.txt: [('-', '')]
LibM19080101-V03-01-page20.txt: [('un-', 'un'), ('un-', 'un'), ('presi-', 'presi')]
LibM19080101-V03-01-page22.txt: [('set-', 'set')]
LibM19080101-V03-01-page23.txt: [('-in', 'in')]
LibM19080101-V03-01-page24.txt: [('-', '')]
LibM19080101-V03-01-page26.txt: [('PRES-', 'PRES')]
LibM19080101-V03-01-page32.txt: [('in-', 'in')]
LibM19080101-V03-01-page34.txt: [('Ren-', 'Ren')]
LibM19080101-V03-01-page36.txt: [('haz-', 'haz')]
LibM19080101-V03-01-page39.txt: [('de-', 'de')]
LibM19080101-V03-01-page40.txt: [('docu-', 'docu')]
LibM19080101-V03-01-page41.txt: [('self-govern-', 'self-govern')]
LibM19080101-V03-01-page47.txt: [('As--', 'As-')]
LibM19080101-V03-01-page48.txt: [('-', ''), ('Under-', 'Under'), ('-', ''), ('-', ''), ('Sab-', 'Sab'), ('-', ''), ('Sov-', 'Sov'), ('-', ''), ('-', '')]
LibM19080101-V03-01-page49.txt: [('-page', 'page')]
LibM19080101-V03-01-page50.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('LIB-', 'LIB')]
LibM19080101-V03-01-page51.txt: [('-..ter', '..ter'), ('-', '')]
LibM19080101-V03-01-page6.txt: [('gen-', 'gen')]
LibM19080101-V03-01-page8.txt: [('-', '')]
LibM19080401-V03-02-page1.txt: [('.-q"P--', '.-q"P-'), ('mutummimmomminumummummumunimmiumummummlimummmumumunummtimummimintowitmmummrx--.-', 'mutummimmomminumummummumunimmiumummummlimummmumumunummtimummimintowitmmummrx--.'), ('-', ''), ('-.-', '.-'), ('-', ''), ('-TuaDCII', 'TuaDCII')]
LibM19080401-V03-02-page11.txt: [('gov-', 'gov')]
LibM19080401-V03-02-page12.txt: [('-', '')]
LibM19080401-V03-02-page14.txt: [('-', '')]
LibM19080401-V03-02-page16.txt: [('-', ''), ('sab-', 'sab')]
LibM19080401-V03-02-page18.txt: [('-day', 'day'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Georgia-', 'Georgia'), ('-', ''), ('Illinois-', 'Illinois'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19080401-V03-02-page21.txt: [('con-', 'con')]
LibM19080401-V03-02-page24.txt: [('-', ''), ('suc-', 'suc')]
LibM19080401-V03-02-page3.txt: [('pos-', 'pos')]
LibM19080401-V03-02-page30.txt: [('Postmaster-', 'Postmaster'), ('la-', 'la')]
LibM19080401-V03-02-page35.txt: [('-', '')]
LibM19080401-V03-02-page36.txt: [('re-', 're')]
LibM19080401-V03-02-page41.txt: [('com-', 'com')]
LibM19080401-V03-02-page43.txt: [('-as', 'as'), ('-', ''), ('tol-', 'tol')]
LibM19080401-V03-02-page44.txt: [('every-', 'every')]
LibM19080401-V03-02-page48.txt: [('Under-', 'Under'), ('Sab-', 'Sab'), ('Sov-', 'Sov')]
LibM19080401-V03-02-page50.txt: [('-', '')]
LibM19080401-V03-02-page51.txt: [('ntitzu-', 'ntitzu'), ('-', '')]
LibM19080401-V03-02-page9.txt: [('-', ''), ('gov-', 'gov')]
LibM19080701-V03-03-page1.txt: [('--ff', '-ff'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('muminatatimiumumuutumitimmittimmummminnumminumuffiummumummunnomiminummuummummimmumnini-', 'muminatatimiumumuutumitimmittimmummminnumminumuffiummumummunnomiminummuummummimmumnini'), ('-Z', 'Z'), ('-', ''), ('---v', '--v')]
LibM19080701-V03-03-page11.txt: [('-', '')]
LibM19080701-V03-03-page14.txt: [('free-', 'free')]
LibM19080701-V03-03-page20.txt: [('-', ''), ('-', ''), ('-', ''), ('re-', 're')]
LibM19080701-V03-03-page21.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19080701-V03-03-page22.txt: [('-', ''), ('I-', 'I')]
LibM19080701-V03-03-page26.txt: [('-', '')]
LibM19080701-V03-03-page28.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19080701-V03-03-page29.txt: [('-', '')]
LibM19080701-V03-03-page30.txt: [('-', '')]
LibM19080701-V03-03-page31.txt: [('-o', 'o'), ('-', '')]
LibM19080701-V03-03-page32.txt: [('na-', 'na')]
LibM19080701-V03-03-page39.txt: [('opin-', 'opin')]
LibM19080701-V03-03-page40.txt: [('Con-', 'Con'), ('-President', 'President')]
LibM19080701-V03-03-page43.txt: [('-finest', 'finest'), ('uni-', 'uni'), ('-versal', 'versal')]
LibM19080701-V03-03-page45.txt: [('prohib-', 'prohib')]
LibM19080701-V03-03-page47.txt: [('-', '')]
LibM19080701-V03-03-page48.txt: [('Revelation.-', 'Revelation.'), ('-', ''), ('-', ''), ('Under-', 'Under'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Sov-', 'Sov'), ('Christ-', 'Christ')]
LibM19080701-V03-03-page49.txt: [('-', '')]
LibM19080701-V03-03-page5.txt: [('con-', 'con')]
LibM19080701-V03-03-page50.txt: [('-', '')]
LibM19080701-V03-03-page52.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19080701-V03-03-page6.txt: [('pas-', 'pas')]
LibM19080701-V03-03-page7.txt: [('-', ''), ('-the', 'the')]
LibM19080701-V03-03-page9.txt: [('Post-', 'Post')]
LibM19081001-V03-04-page1.txt: [('E-', 'E'), ('-', ''), ('-rI', 'rI'), ('-', ''), ('-', '')]
LibM19081001-V03-04-page13.txt: [('ef-', 'ef')]
LibM19081001-V03-04-page14.txt: [('-', ''), ('state-', 'state')]
LibM19081001-V03-04-page15.txt: [('-', '')]
LibM19081001-V03-04-page18.txt: [('In-', 'In')]
LibM19081001-V03-04-page19.txt: [('-', '')]
LibM19081001-V03-04-page20.txt: [('-', '')]
LibM19081001-V03-04-page22.txt: [('es-', 'es'), ('for-', 'for')]
LibM19081001-V03-04-page23.txt: [('sab-', 'sab'), ('some-', 'some')]
LibM19081001-V03-04-page26.txt: [('-hall', 'hall')]
LibM19081001-V03-04-page33.txt: [('--', '-')]
LibM19081001-V03-04-page34.txt: [('tend-', 'tend'), ('stri-', 'stri')]
LibM19081001-V03-04-page37.txt: [('punment.--', 'punment.-'), ('imprison-', 'imprison'), ('--', '-')]
LibM19081001-V03-04-page39.txt: [('-', '')]
LibM19081001-V03-04-page4.txt: [('com-', 'com')]
LibM19081001-V03-04-page40.txt: [('com-', 'com')]
LibM19081001-V03-04-page41.txt: [('remem-', 'remem'), ('-which', 'which')]
LibM19081001-V03-04-page43.txt: [('pecul-', 'pecul')]
LibM19081001-V03-04-page44.txt: [('un-', 'un'), ('-', '')]
LibM19081001-V03-04-page47.txt: [('repu-', 'repu')]
LibM19081001-V03-04-page49.txt: [('-', ''), ('-', '')]
LibM19081001-V03-04-page50.txt: [('-', ''), ('-', '')]
LibM19081001-V03-04-page52.txt: [('-', '')]
LibM19081001-V03-04-page8.txt: [('-', '')]
LibM19081001-V03-04-page9.txt: [('un-', 'un')]
LibM19090101-V04-01-page1.txt: [('-', ''), ('.....-', '.....'), ('-', ''), ('-....i"', '....i"'), ('k..a...--', 'k..a...-'), ('-', ''), ('-', ''), ('-', '')]
LibM19090101-V04-01-page17.txt: [('-', ''), ('con-', 'con')]
LibM19090101-V04-01-page18.txt: [('-', '')]
LibM19090101-V04-01-page21.txt: [('-', ''), ('-', '')]
LibM19090101-V04-01-page24.txt: [('-.', '.'), ('.-', '.')]
LibM19090101-V04-01-page26.txt: [('con-', 'con')]
LibM19090101-V04-01-page3.txt: [('relig-', 'relig')]
LibM19090101-V04-01-page30.txt: [('-', '')]
LibM19090101-V04-01-page33.txt: [('-', ''), ('na-', 'na'), ('insti-', 'insti'), ('r--', 'r-')]
LibM19090101-V04-01-page36.txt: [('-', '')]
LibM19090101-V04-01-page37.txt: [('-.', '.'), ('.-', '.')]
LibM19090101-V04-01-page40.txt: [('founda-', 'founda')]
LibM19090101-V04-01-page41.txt: [('per-', 'per')]
LibM19090101-V04-01-page44.txt: [('in-', 'in'), ('disor-', 'disor')]
LibM19090101-V04-01-page45.txt: [('be-', 'be'), ('Mc-', 'Mc')]
LibM19090101-V04-01-page47.txt: [('-', ''), ('-', '')]
LibM19090101-V04-01-page49.txt: [('Post-', 'Post'), ('-', ''), ('-', '')]
LibM19090101-V04-01-page5.txt: [('-', '')]
LibM19090101-V04-01-page50.txt: [('-', '')]
LibM19090101-V04-01-page52.txt: [('-', ''), ('-', '')]
LibM19090101-V04-01-page7.txt: [('SECRE-', 'SECRE')]
LibM19090401-V04-02-page1.txt: [('-', ''), ('-L', 'L'), ('-', ''), ('-', ''), ('---mussuaillir', '--mussuaillir'), ('-', '')]
LibM19090401-V04-02-page10.txt: [('scru-', 'scru')]
LibM19090401-V04-02-page12.txt: [('-', ''), ('Gib-', 'Gib'), ('in-', 'in')]
LibM19090401-V04-02-page2.txt: [('-hi', 'hi'), ('-', ''), ('-ss', 'ss'), ('e-', 'e'), ('-', ''), ('-', ''), ('ja-', 'ja'), ('ace-', 'ace'), ('-Q.s.-', 'Q.s.-'), ('Ca-', 'Ca'), ('-', ''), ('-', ''), ('.....-', '.....'), ('-', ''), ('-', ''), ('-', ''), ('-V', 'V'), ('-', ''), ('-', ''), ('-', ''), ('-dte-y', 'dte-y'), ('-', ''), ('-c.x', 'c.x'), ('-eed', 'eed'), ('-', ''), ('rt-', 'rt'), ('-', ''), ('-', ''), ('-', ''), ("'r-", "'r"), ('-n', 'n')]
LibM19090401-V04-02-page20.txt: [('Vir-', 'Vir')]
LibM19090401-V04-02-page23.txt: [('-', ''), ('As-', 'As')]
LibM19090401-V04-02-page30.txt: [('--', '-'), ('gov-', 'gov')]
LibM19090401-V04-02-page36.txt: [('-t', 't')]
LibM19090401-V04-02-page38.txt: [('Rich-', 'Rich'), ('neverthe-', 'neverthe')]
LibM19090401-V04-02-page45.txt: [('-', ''), ('con-', 'con'), ('-t', 't'), ('-', ''), ('-eta', 'eta'), ('.-', '.'), ('-', ''), ('aforexo.-', 'aforexo.')]
LibM19090401-V04-02-page46.txt: [('-', ''), ('mur-', 'mur'), ('-', ''), ('-.', '.')]
LibM19090401-V04-02-page48.txt: [('PEAR-', 'PEAR'), ('-', ''), ('-', '')]
LibM19090401-V04-02-page49.txt: [('Post-', 'Post'), ('-', ''), ('-', '')]
LibM19090401-V04-02-page5.txt: [('-.', '.'), ('.-', '.')]
LibM19090401-V04-02-page50.txt: [('-', '')]
LibM19090401-V04-02-page51.txt: [('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', '')]
LibM19090401-V04-02-page8.txt: [('Mc-', 'Mc')]
LibM19090401-V04-02-page9.txt: [('-', ''), ('acknowl-', 'acknowl')]
LibM19090701-V04-03-page1.txt: [('-"', '"'), ('-lib', 'lib'), ('..ILI--', '..ILI-')]
LibM19090701-V04-03-page10.txt: [('RECEP-', 'RECEP')]
LibM19090701-V04-03-page11.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('an-', 'an'), ('at--', 'at-')]
LibM19090701-V04-03-page13.txt: [('Con-', 'Con'), ('-', '')]
LibM19090701-V04-03-page14.txt: [('-', '')]
LibM19090701-V04-03-page2.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19090701-V04-03-page22.txt: [('Russian-', 'Russian')]
LibM19090701-V04-03-page23.txt: [('-', '')]
LibM19090701-V04-03-page26.txt: [('-', '')]
LibM19090701-V04-03-page29.txt: [('time-hon-', 'time-hon')]
LibM19090701-V04-03-page3.txt: [('mat-', 'mat')]
LibM19090701-V04-03-page30.txt: [('-', '')]
LibM19090701-V04-03-page32.txt: [('de-', 'de')]
LibM19090701-V04-03-page33.txt: [('non-', 'non'), ('pro-', 'pro')]
LibM19090701-V04-03-page34.txt: [('Hu-', 'Hu'), ('CHRIS-', 'CHRIS'), ('be-', 'be')]
LibM19090701-V04-03-page36.txt: [('--', '-')]
LibM19090701-V04-03-page37.txt: [('there-', 'there'), ('re-', 're')]
LibM19090701-V04-03-page40.txt: [('en-', 'en')]
LibM19090701-V04-03-page42.txt: [('-taptimi', 'taptimi'), ('-thifii', 'thifii'), ('-', ''), ('trinn-', 'trinn'), ('-fihAt', 'fihAt'), ('Yr-', 'Yr')]
LibM19090701-V04-03-page44.txt: [('en-', 'en'), ('Anti-', 'Anti')]
LibM19090701-V04-03-page48.txt: [('Post-', 'Post'), ('Cook-', 'Cook')]
LibM19090701-V04-03-page49.txt: [('-', ''), ('-', ''), ('APPEAR-', 'APPEAR')]
LibM19090701-V04-03-page5.txt: [('govern-', 'govern')]
LibM19090701-V04-03-page50.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19090701-V04-03-page51.txt: [('-', '')]
LibM19090701-V04-03-page52.txt: [('-', ''), ('-', '')]
LibM19090701-V04-03-page6.txt: [('pro-', 'pro')]
LibM19090701-V04-03-page7.txt: [('Mc-', 'Mc')]
LibM19090701-V04-03-page9.txt: [('-', ''), ('-', '')]
LibM19091001-V04-04-page10.txt: [('af-', 'af')]
LibM19091001-V04-04-page11.txt: [('gov-', 'gov'), ('horse-', 'horse')]
LibM19091001-V04-04-page13.txt: [('be-', 'be')]
LibM19091001-V04-04-page14.txt: [('af-', 'af')]
LibM19091001-V04-04-page15.txt: [('R-', 'R')]
LibM19091001-V04-04-page16.txt: [('-.', '.'), ('.-', '.'), ('spiritu-', 'spiritu')]
LibM19091001-V04-04-page17.txt: [('es-', 'es')]
LibM19091001-V04-04-page18.txt: [('di-', 'di')]
LibM19091001-V04-04-page19.txt: [('-', '')]
LibM19091001-V04-04-page2.txt: [('Au-', 'Au'), ('Post-', 'Post')]
LibM19091001-V04-04-page21.txt: [('-', '')]
LibM19091001-V04-04-page22.txt: [('-', ''), ('-', ''), ('anti-', 'anti'), ('-', ''), ('-', ''), ('Mc-', 'Mc')]
LibM19091001-V04-04-page23.txt: [('A.-', 'A.')]
LibM19091001-V04-04-page25.txt: [('.T-', '.T')]
LibM19091001-V04-04-page3.txt: [('-', '')]
LibM19091001-V04-04-page30.txt: [('Ware-', 'Ware'), ('-the', 'the')]
LibM19091001-V04-04-page31.txt: [('-', ''), ('CON-', 'CON')]
LibM19091001-V04-04-page32.txt: [('-', ''), ('-', '')]
LibM19091001-V04-04-page35.txt: [('de-', 'de')]
LibM19091001-V04-04-page36.txt: [('-', '')]
LibM19091001-V04-04-page38.txt: [('b-', 'b'), ('phrase-', 'phrase'), ('-', '')]
LibM19091001-V04-04-page39.txt: [('non-', 'non')]
LibM19091001-V04-04-page4.txt: [('para-', 'para')]
LibM19091001-V04-04-page45.txt: [('finan-', 'finan')]
LibM19091001-V04-04-page47.txt: [('APPEAR-', 'APPEAR')]
LibM19091001-V04-04-page48.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19091001-V04-04-page7.txt: [('Mc-', 'Mc'), ('differen-', 'differen')]
LibM19091001-V04-04-page8.txt: [('-', '')]
LibM19091001-V04-04-page9.txt: [('-', '')]
LibM19100101-V05-01-page1.txt: [('r-', 'r'), ('-.', '.'), ('.-', '.')]
LibM19100101-V05-01-page11.txt: [('thou-', 'thou')]
LibM19100101-V05-01-page13.txt: [('-', ''), ('Ad-', 'Ad')]
LibM19100101-V05-01-page14.txt: [('WASH-', 'WASH'), ('RE-', 'RE'), ('mem-', 'mem')]
LibM19100101-V05-01-page17.txt: [('Mc-', 'Mc'), ('Secretary-of-', 'Secretary-of')]
LibM19100101-V05-01-page19.txt: [('incon-', 'incon')]
LibM19100101-V05-01-page2.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19100101-V05-01-page20.txt: [('com-', 'com')]
LibM19100101-V05-01-page21.txt: [('sup-', 'sup')]
LibM19100101-V05-01-page23.txt: [('free-', 'free')]
LibM19100101-V05-01-page24.txt: [('Chris-', 'Chris')]
LibM19100101-V05-01-page27.txt: [('-', '')]
LibM19100101-V05-01-page31.txt: [('-', ''), ('-', ''), ('--', '-'), ('--', '-')]
LibM19100101-V05-01-page32.txt: [('guar-', 'guar'), ('Postmaster-', 'Postmaster')]
LibM19100101-V05-01-page33.txt: [('-Edward', 'Edward'), ('des-', 'des')]
LibM19100101-V05-01-page34.txt: [('Anti-', 'Anti')]
LibM19100101-V05-01-page35.txt: [('com-', 'com')]
LibM19100101-V05-01-page36.txt: [('-', ''), ('-"', '"'), ('-', '')]
LibM19100101-V05-01-page37.txt: [('separa-', 'separa')]
LibM19100101-V05-01-page39.txt: [('-Z', 'Z'), ('-.E', '.E'), ('-', ''), ('-A', 'A')]
LibM19100101-V05-01-page42.txt: [('-', '')]
LibM19100101-V05-01-page45.txt: [('-', '')]
LibM19100101-V05-01-page46.txt: [('-', ''), ('over-', 'over')]
LibM19100101-V05-01-page47.txt: [('-', '')]
LibM19100101-V05-01-page48.txt: [('-', '')]
LibM19100101-V05-01-page49.txt: [('sp-', 'sp'), ('-', ''), ('-', '')]
LibM19100101-V05-01-page50.txt: [('-', ''), ('Artaa.--', 'Artaa.-')]
LibM19100101-V05-01-page6.txt: [('-', '')]
LibM19100101-V05-01-page7.txt: [('ap-', 'ap'), ('dis-', 'dis'), ('Cath-', 'Cath')]
LibM19100401-V05-02-page1.txt: [('...m..."..--', '...m..."..-'), ('.-', '.'), ('--mommumniummunuimiumuutimutimmulummimmiummintomunmumumummumumumnomminuninumninummumumummtuntiummirt', '-mommumniummunuimiumuutimutimmulummimmiummintomunmumumummumumumnomminuninumninummumumummtuntiummirt'), ('-.', '.'), ('-', ''), ('-', ''), ("-'-", "'-"), ('-j', 'j'), ("--S-'''", "-S-'''"), ('--', '-')]
LibM19100401-V05-02-page12.txt: [('enforce-', 'enforce'), ('op-', 'op')]
LibM19100401-V05-02-page13.txt: [('-', ''), ('-', '')]
LibM19100401-V05-02-page16.txt: [('Mary-', 'Mary')]
LibM19100401-V05-02-page18.txt: [('-', '')]
LibM19100401-V05-02-page2.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19100401-V05-02-page21.txt: [('sun-', 'sun')]
LibM19100401-V05-02-page24.txt: [('at-', 'at'), ('-church', 'church')]
LibM19100401-V05-02-page25.txt: [('trans-', 'trans')]
LibM19100401-V05-02-page26.txt: [('in-', 'in')]
LibM19100401-V05-02-page27.txt: [('ex-', 'ex')]
LibM19100401-V05-02-page3.txt: [('-PR', 'PR')]
LibM19100401-V05-02-page35.txt: [('-', ''), ('-friEHORRoki', 'friEHORRoki'), ('-CHER', 'CHER')]
LibM19100401-V05-02-page38.txt: [('Sun-', 'Sun')]
LibM19100401-V05-02-page40.txt: [('advo-', 'advo')]
LibM19100401-V05-02-page46.txt: [('re-', 're')]
LibM19100401-V05-02-page48.txt: [('Teach-', 'Teach')]
LibM19100401-V05-02-page49.txt: [('-', '')]
LibM19100401-V05-02-page5.txt: [('LIB-', 'LIB')]
LibM19100401-V05-02-page50.txt: [('-', ''), ('ac-', 'ac')]
LibM19100401-V05-02-page52.txt: [('-', ''), ('legisla-', 'legisla'), ('Jan-', 'Jan')]
LibM19100401-V05-02-page6.txt: [('-', '')]
LibM19100401-V05-02-page8.txt: [('PRESI-', 'PRESI')]
LibM19100701-V05-03-page17.txt: [('pros-', 'pros')]
LibM19100701-V05-03-page18.txt: [('ex-', 'ex')]
LibM19100701-V05-03-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('Lafayette-', 'Lafayette'), ('-', '')]
LibM19100701-V05-03-page20.txt: [('-', ''), ('-', '')]
LibM19100701-V05-03-page21.txt: [('cus-', 'cus'), ('-', '')]
LibM19100701-V05-03-page22.txt: [('-', '')]
LibM19100701-V05-03-page23.txt: [('Cath-', 'Cath')]
LibM19100701-V05-03-page24.txt: [('-', '')]
LibM19100701-V05-03-page26.txt: [('prin-', 'prin')]
LibM19100701-V05-03-page28.txt: [('non-', 'non'), ('insti-', 'insti'), ('re-', 're')]
LibM19100701-V05-03-page30.txt: [('-', ''), ('-I', 'I')]
LibM19100701-V05-03-page32.txt: [('An-', 'An'), ('Gen-', 'Gen')]
LibM19100701-V05-03-page34.txt: [('-', ''), ('meet-', 'meet')]
LibM19100701-V05-03-page35.txt: [('Li-', 'Li'), ('circula-', 'circula'), ('Pro-', 'Pro')]
LibM19100701-V05-03-page37.txt: [('ERRON-', 'ERRON'), ('HISTOR-', 'HISTOR'), ('PRAC-', 'PRAC'), ('-', '')]
LibM19100701-V05-03-page40.txt: [('--', '-')]
LibM19100701-V05-03-page46.txt: [('Anti-', 'Anti')]
LibM19100701-V05-03-page49.txt: [('PROTES-', 'PROTES'), ('MAG-', 'MAG'), ('Roosevelt-', 'Roosevelt'), ('-', '')]
LibM19100701-V05-03-page5.txt: [('Vat-', 'Vat')]
LibM19100701-V05-03-page50.txt: [('-', '')]
LibM19100701-V05-03-page52.txt: [('Inter-', 'Inter'), ('Post-', 'Post')]
LibM19100701-V05-03-page7.txt: [('Mc-', 'Mc')]
LibM19101001-V05-04-page1.txt: [('-', ''), ('-ANIMMIIMMIMMIIMMIIMIMMIWIMMUMWHIMOMMOMMIIMMIHMUMMIMIUMMIMMEMIIMMUMMUMMENUMIIIIMMUUMMINUMMIS', 'ANIMMIIMMIMMIIMMIIMIMMIWIMMUMWHIMOMMOMMIIMMIHMUMMIMIUMMIMMEMIIMMUMMUMMENUMIIIIMMUUMMINUMMIS'), ('st-', 'st'), ('-..-...', '..-...'), ('-X', 'X'), ('"-', '"'), ('-', ''), ('r.-', 'r.'), ('-', ''), ('-', ''), ('---', '--')]
LibM19101001-V05-04-page10.txt: [('-under', 'under')]
LibM19101001-V05-04-page11.txt: [('-authority', 'authority')]
LibM19101001-V05-04-page13.txt: [('-', '')]
LibM19101001-V05-04-page15.txt: [('-', '')]
LibM19101001-V05-04-page16.txt: [('gov-', 'gov')]
LibM19101001-V05-04-page19.txt: [('-', ''), ('spir-', 'spir')]
LibM19101001-V05-04-page2.txt: [('-', ''), ('-', ''), ('-S', 'S')]
LibM19101001-V05-04-page21.txt: [('-', ''), ('OPEN-', 'OPEN')]
LibM19101001-V05-04-page23.txt: [('OPEN-', 'OPEN'), ('gov-', 'gov')]
LibM19101001-V05-04-page24.txt: [('MON-', 'MON')]
LibM19101001-V05-04-page25.txt: [('hon-', 'hon')]
LibM19101001-V05-04-page26.txt: [('sig-', 'sig')]
LibM19101001-V05-04-page28.txt: [('MON-', 'MON'), ('char-', 'char'), ('in-', 'in'), ('L-', 'L')]
LibM19101001-V05-04-page29.txt: [('-', ''), ('interna-', 'interna')]
LibM19101001-V05-04-page30.txt: [('com-', 'com')]
LibM19101001-V05-04-page32.txt: [('antipedo-', 'antipedo')]
LibM19101001-V05-04-page34.txt: [('fear-', 'fear'), ('-', '')]
LibM19101001-V05-04-page35.txt: [('consola-', 'consola')]
LibM19101001-V05-04-page36.txt: [('-', '')]
LibM19101001-V05-04-page39.txt: [('y-', 'y')]
LibM19101001-V05-04-page42.txt: [('Zapnath-', 'Zapnath'), ('-"', '"'), ('Tel-el-', 'Tel-el')]
LibM19101001-V05-04-page43.txt: [('de-', 'de')]
LibM19101001-V05-04-page49.txt: [('PROTES-', 'PROTES'), ('MAG-', 'MAG'), ('Roosevelt-', 'Roosevelt')]
LibM19101001-V05-04-page5.txt: [('-.', '.'), ('-', '')]
LibM19101001-V05-04-page50.txt: [('-', '')]
LibM19101001-V05-04-page51.txt: [('-', '')]
LibM19101001-V05-04-page8.txt: [('-', '')]
LibM19101001-V05-04-page9.txt: [('-America', 'America')]
LibM19110101-V06-01-page1.txt: [('-...ffiummummiummunnummumummmumumummummunamummunummuumummmunummunnummumummumnitumnims', '...ffiummummiummunnummumummmumumummummunamummunummuumummmunummunnummumummumnitumnims'), ('-"C""', '"C""'), ('-', ''), ('-', ''), ('-', ''), ('Z---', 'Z--'), ('-', ''), ('.---', '.--'), ('-.', '.'), ('-', ''), ('ir-', 'ir'), ('-', ''), ('"nrnurilillpii"-', '"nrnurilillpii"')]
LibM19110101-V06-01-page11.txt: [('-as', 'as'), ('desire-', 'desire')]
LibM19110101-V06-01-page12.txt: [('sum-', 'sum')]
LibM19110101-V06-01-page13.txt: [('-', '')]
LibM19110101-V06-01-page15.txt: [('-', '')]
LibM19110101-V06-01-page18.txt: [('enforce-', 'enforce'), ('Mc-', 'Mc')]
LibM19110101-V06-01-page2.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19110101-V06-01-page20.txt: [('ac-', 'ac')]
LibM19110101-V06-01-page22.txt: [('-i', 'i'), ('-', ''), ('-', '')]
LibM19110101-V06-01-page23.txt: [('-', '')]
LibM19110101-V06-01-page27.txt: [('-', '')]
LibM19110101-V06-01-page29.txt: [('contra-', 'contra')]
LibM19110101-V06-01-page31.txt: [('par-', 'par')]
LibM19110101-V06-01-page34.txt: [('RE-', 'RE'), ('-great', 'great')]
LibM19110101-V06-01-page35.txt: [('lib-', 'lib')]
LibM19110101-V06-01-page36.txt: [('-', '')]
LibM19110101-V06-01-page42.txt: [('-', '')]
LibM19110101-V06-01-page43.txt: [('-', '')]
LibM19110101-V06-01-page49.txt: [('-', '')]
LibM19110101-V06-01-page5.txt: [('differ-', 'differ')]
LibM19110101-V06-01-page50.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19110101-V06-01-page6.txt: [('-', ''), ('--', '-'), ('-', ''), ('-', '')]
LibM19110101-V06-01-page7.txt: [('po-', 'po')]
LibM19110101-V06-01-page8.txt: [('Latin-', 'Latin')]
LibM19110101-V06-01-page9.txt: [('--', '-'), ('-', '')]
LibM19110401-V06-02-page1.txt: [('-', '')]
LibM19110401-V06-02-page11.txt: [('ac-', 'ac')]
LibM19110401-V06-02-page12.txt: [('employ-', 'employ')]
LibM19110401-V06-02-page13.txt: [('oc-', 'oc'), ('legiti-', 'legiti')]
LibM19110401-V06-02-page14.txt: [('meas-', 'meas')]
LibM19110401-V06-02-page16.txt: [('nec-', 'nec')]
LibM19110401-V06-02-page18.txt: [('UNI-', 'UNI'), ('labor-', 'labor')]
LibM19110401-V06-02-page2.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19110401-V06-02-page20.txt: [('en-', 'en')]
LibM19110401-V06-02-page26.txt: [('varia-', 'varia'), ('-', '')]
LibM19110401-V06-02-page27.txt: [('offi-', 'offi')]
LibM19110401-V06-02-page3.txt: [('-', ''), ('-', ''), ('.-', '.')]
LibM19110401-V06-02-page32.txt: [('Bap-', 'Bap'), ('Relig-', 'Relig')]
LibM19110401-V06-02-page33.txt: [('-wow-', 'wow-')]
LibM19110401-V06-02-page34.txt: [('-', ''), ('per-', 'per')]
LibM19110401-V06-02-page40.txt: [('es-', 'es')]
LibM19110401-V06-02-page42.txt: [('-', ''), ('-', ''), ('-.', '.'), ('.-', '.')]
LibM19110401-V06-02-page43.txt: [('-', ''), ('-', ''), ('God.-', 'God.')]
LibM19110401-V06-02-page46.txt: [('Conti-', 'Conti'), ('BUILD-', 'BUILD'), ('per-', 'per')]
LibM19110401-V06-02-page47.txt: [('Globe-', 'Globe')]
LibM19110401-V06-02-page48.txt: [('-', ''), ('--', '-')]
LibM19110401-V06-02-page49.txt: [('-', ''), ('-', ''), ('-Lamer.', 'Lamer.')]
LibM19110401-V06-02-page50.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19110401-V06-02-page52.txt: [('-o', 'o')]
LibM19110701-V06-03-page1.txt: [('--', '-'), ('-', ''), ('-dkialligranli', 'dkialligranli')]
LibM19110701-V06-03-page10.txt: [('-', '')]
LibM19110701-V06-03-page12.txt: [('-', ''), ('-IT.', 'IT.'), ('right-', 'right')]
LibM19110701-V06-03-page14.txt: [('un-', 'un')]
LibM19110701-V06-03-page15.txt: [('-for', 'for')]
LibM19110701-V06-03-page16.txt: [('establish-', 'establish')]
LibM19110701-V06-03-page2.txt: [('-', ''), ('-', '')]
LibM19110701-V06-03-page21.txt: [('Eng-', 'Eng')]
LibM19110701-V06-03-page22.txt: [('peo-', 'peo')]
LibM19110701-V06-03-page24.txt: [('-', ''), ('manufac-', 'manufac')]
LibM19110701-V06-03-page25.txt: [('ter-', 'ter'), ('wor-', 'wor'), ('-', '')]
LibM19110701-V06-03-page26.txt: [('.-', '.'), ('-', ''), ('.-', '.')]
LibM19110701-V06-03-page27.txt: [('re-', 're')]
LibM19110701-V06-03-page28.txt: [('audience-', 'audience')]
LibM19110701-V06-03-page31.txt: [('ac-', 'ac')]
LibM19110701-V06-03-page32.txt: [('Prot-', 'Prot'), ('re-', 're')]
LibM19110701-V06-03-page33.txt: [('Sabbathkeep-', 'Sabbathkeep'), ('under-', 'under')]
LibM19110701-V06-03-page35.txt: [('mem-', 'mem')]
LibM19110701-V06-03-page37.txt: [('dissolu-', 'dissolu')]
LibM19110701-V06-03-page39.txt: [('-', '')]
LibM19110701-V06-03-page4.txt: [('-', '')]
LibM19110701-V06-03-page41.txt: [('bar-', 'bar')]
LibM19110701-V06-03-page42.txt: [('Ma-', 'Ma')]
LibM19110701-V06-03-page45.txt: [('re-', 're')]
LibM19110701-V06-03-page48.txt: [('-N', 'N')]
LibM19110701-V06-03-page49.txt: [('-', ''), ('-', ''), ('treat-', 'treat')]
LibM19110701-V06-03-page5.txt: [('re-', 're')]
LibM19110701-V06-03-page50.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('expe-', 'expe')]
LibM19110701-V06-03-page52.txt: [('rea-', 'rea')]
LibM19110701-V06-03-page9.txt: [('Post-', 'Post'), ('ob-', 'ob')]
LibM19111001-V06-04-page1.txt: [('-', ''), ('-', '')]
LibM19111001-V06-04-page11.txt: [('Latin-', 'Latin'), ('An-', 'An'), ('Con-', 'Con')]
LibM19111001-V06-04-page12.txt: [('-', ''), ('guar-', 'guar')]
LibM19111001-V06-04-page14.txt: [('Etats-', 'Etats'), ('-', ''), ('-', '')]
LibM19111001-V06-04-page16.txt: [('----', '---'), ('AMER-', 'AMER')]
LibM19111001-V06-04-page17.txt: [('rev-', 'rev')]
LibM19111001-V06-04-page18.txt: [('o-', 'o'), ('ex-', 'ex')]
LibM19111001-V06-04-page19.txt: [('-said', 'said'), ('legis-', 'legis')]
LibM19111001-V06-04-page2.txt: [('-', ''), ('-', '')]
LibM19111001-V06-04-page23.txt: [('ex-', 'ex')]
LibM19111001-V06-04-page24.txt: [('-', ''), ('-.', '.'), ('.-', '.')]
LibM19111001-V06-04-page26.txt: [('-', ''), ('con-', 'con')]
LibM19111001-V06-04-page34.txt: [('-', ''), ('-', '')]
LibM19111001-V06-04-page35.txt: [('argu-', 'argu')]
LibM19111001-V06-04-page36.txt: [('CRUM-', 'CRUM')]
LibM19111001-V06-04-page38.txt: [('-', ''), ('-', '')]
LibM19111001-V06-04-page39.txt: [('-', ''), ('-', '')]
LibM19111001-V06-04-page40.txt: [('-', '')]
LibM19111001-V06-04-page42.txt: [('-', '')]
LibM19111001-V06-04-page48.txt: [('-', '')]
LibM19111001-V06-04-page49.txt: [('-', ''), ('-', ''), ('PDNIam-', 'PDNIam')]
LibM19111001-V06-04-page5.txt: [('o-', 'o'), ('-', '')]
LibM19111001-V06-04-page50.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19111001-V06-04-page52.txt: [('-li', 'li'), ('Ra-', 'Ra'), ('-li', 'li')]
LibM19111001-V06-04-page8.txt: [('-', '')]
LibM19120101-V07-01-page12.txt: [('-', ''), ('-', '')]
LibM19120101-V07-01-page15.txt: [('assess-', 'assess'), ('com-', 'com')]
LibM19120101-V07-01-page19.txt: [('-other', 'other')]
LibM19120101-V07-01-page2.txt: [('-', ''), ('-', '')]
LibM19120101-V07-01-page22.txt: [('com-', 'com')]
LibM19120101-V07-01-page26.txt: [('Novem-', 'Novem')]
LibM19120101-V07-01-page27.txt: [('Pan-', 'Pan')]
LibM19120101-V07-01-page33.txt: [('-', '')]
LibM19120101-V07-01-page37.txt: [('-', '')]
LibM19120101-V07-01-page38.txt: [('Brigadier-', 'Brigadier'), ('fin-', 'fin')]
LibM19120101-V07-01-page39.txt: [('-', '')]
LibM19120101-V07-01-page42.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19120101-V07-01-page43.txt: [('-', ''), ('ESTAB-', 'ESTAB')]
LibM19120101-V07-01-page45.txt: [('-', ''), ('-', '')]
LibM19120101-V07-01-page46.txt: [('-', '')]
LibM19120101-V07-01-page47.txt: [('-.', '.'), ('.-', '.')]
LibM19120101-V07-01-page49.txt: [('devel-', 'devel'), ('-', ''), ('PM-', 'PM'), ('-', ''), ('p-', 'p')]
LibM19120101-V07-01-page50.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19120101-V07-01-page6.txt: [('-', '')]
LibM19120101-V07-01-page7.txt: [('-', ''), ('Fairbanks-Roosevelt-', 'Fairbanks-Roosevelt')]
LibM19120101-V07-01-page8.txt: [('-', ''), ('-', '')]
LibM19120101-V07-01-page9.txt: [('be-', 'be')]
LibM19120401-V07-02-page2.txt: [('-', '')]
LibM19120401-V07-02-page21.txt: [('--', '-')]
LibM19120401-V07-02-page25.txt: [('no-', 'no')]
LibM19120401-V07-02-page26.txt: [('divi-', 'divi'), ('mat-', 'mat')]
LibM19120401-V07-02-page27.txt: [('di-', 'di')]
LibM19120401-V07-02-page29.txt: [('--', '-')]
LibM19120401-V07-02-page30.txt: [('-', '')]
LibM19120401-V07-02-page31.txt: [('un-', 'un')]
LibM19120401-V07-02-page33.txt: [('un-', 'un'), ('be-', 'be')]
LibM19120401-V07-02-page34.txt: [('un-', 'un')]
LibM19120401-V07-02-page36.txt: [('Accord-', 'Accord')]
LibM19120401-V07-02-page37.txt: [('-', '')]
LibM19120401-V07-02-page38.txt: [('-', ''), ('-', ''), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-icx-m', 'icx-m'), ('Xl-td-', 'Xl-td'), ('ec-', 'ec'), ('-', ''), ('-', ''), ('-', ''), ('-mensisZ."\'"-', 'mensisZ."\'"-')]
LibM19120401-V07-02-page4.txt: [('-...', '...')]
LibM19120401-V07-02-page40.txt: [('-Sep-', 'Sep-'), ('-szera', 'szera'), ('-ilre', 'ilre')]
LibM19120401-V07-02-page42.txt: [('discus-', 'discus'), ('-sion', 'sion')]
LibM19120401-V07-02-page48.txt: [('-', ''), ('LIB-', 'LIB'), ('-', '')]
LibM19120401-V07-02-page49.txt: [('-', '')]
LibM19120401-V07-02-page51.txt: [('-Seven', 'Seven'), ('ar-', 'ar')]
LibM19120401-V07-02-page6.txt: [('Cali-', 'Cali')]
LibM19120401-V07-02-page7.txt: [('non-', 'non'), ('-', '')]
LibM19120401-V07-02-page8.txt: [('for-', 'for')]
LibM19120701-V07-03-page11.txt: [('be-', 'be')]
LibM19120701-V07-03-page13.txt: [('anti-', 'anti'), ('-rotest', 'rotest'), ('hol-', 'hol')]
LibM19120701-V07-03-page14.txt: [('-', ''), ('-', '')]
LibM19120701-V07-03-page16.txt: [('Sec-', 'Sec')]
LibM19120701-V07-03-page17.txt: [('-', ''), ('com-', 'com')]
LibM19120701-V07-03-page18.txt: [('distinct-', 'distinct')]
LibM19120701-V07-03-page2.txt: [('Co-', 'Co'), ('-', '')]
LibM19120701-V07-03-page20.txt: [('-ss', 'ss')]
LibM19120701-V07-03-page21.txt: [('-', ''), ('-', ''), ('estab-', 'estab')]
LibM19120701-V07-03-page22.txt: [('re-', 're'), ('--', '-')]
LibM19120701-V07-03-page25.txt: [('-', '')]
LibM19120701-V07-03-page28.txt: [('unveil-', 'unveil')]
LibM19120701-V07-03-page30.txt: [('-', '')]
LibM19120701-V07-03-page37.txt: [('-', '')]
LibM19120701-V07-03-page38.txt: [('con-', 'con'), ('AMEND-', 'AMEND'), ('-', '')]
LibM19120701-V07-03-page4.txt: [('-ititeiltintonecfctration', 'ititeiltintonecfctration'), ('-', ''), ('-', ''), ('-', ''), ('e.n.d....-', 'e.n.d....'), ('i-', 'i'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('s-', 's'), ('-', ''), ('-fr', 'fr'), ('ee-', 'ee'), ('-', ''), ('-', ''), ('-..-.', '..-.'), ('f--', 'f-'), ('otb-', 'otb'), ('......-', '......'), ('--', '-'), ('-', ''), ('-a', 'a'), ('-.', '.'), ('-e-', 'e-'), ('-', ''), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('d-', 'd'), ('..---', '..--'), ('.i..-', '.i..'), ('..ta.--', '..ta.-'), ('.-', '.'), ('...-', '...'), ('-w', 'w'), ('x.t-', 'x.t'), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('..-', '..'), ('-', ''), ('.-', '.'), ('..g-Z-', '..g-Z'), ('---.', '--.'), ('--', '-'), ('-', ''), ("---'", "--'"), ('--ft', '-ft'), ('----a', '---a'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-sfo', 'sfo'), ('-', ''), ('....-', '....'), ('-', ''), ('a...-', 'a...'), ('-', ''), ('-.', '.'), ('-', ''), ('--', '-'), ("-.i'", ".i'"), ('N.-', 'N.'), ('m-', 'm'), ('-', ''), ('-', ''), ('-', ''), ('dfr.d.-', 'dfr.d.'), ('-e', 'e'), ('ap-', 'ap'), ('-.onia', '.onia'), ('-', ''), ('-', ''), ('-.', '.'), ('.-', '.'), ('--z', '-z'), ('-', ''), ('-', ''), ('-', ''), ('...-vr-', '...-vr'), ('-.', '.'), ('.-', '.'), ('-', ''), ('e-', 'e'), ('-', ''), ('-e', 'e'), ('-..', '..'), ('-A.c.....', 'A.c.....'), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-..g.', '..g.'), ('-.', '.'), ('g--', 'g-'), ('-', ''), ("--'", "-'"), ('-inio', 'inio'), ('-LI', 'LI'), ('-I', 'I'), ('-...', '...'), ('N.-', 'N.'), ('n-', 'n'), ('.ea...-', '.ea...'), ('-a', 'a'), ('-', ''), ('-', ''), ('-i-', 'i-'), ('-..a.A.', '..a.A.'), ('h---', 'h--'), ('.-', '.'), ('-', ''), ('--.r..', '-.r..'), ('.-', '.'), ('-.-', '.-'), ('-', ''), ('---', '--'), ("-'", "'"), ('---is.', '--is.'), ('-', ''), ('-r', 'r'), ('--Yelor.', '-Yelor.'), ('-.', '.'), ('-....-..C.', '....-..C.'), ('-', ''), ('-ir."...ezi..i..', 'ir."...ezi..i..'), ('-', ''), ('-', ''), ('e.e.-', 'e.e.'), ('-', ''), ('-', ''), ('..-', '..'), ('-', ''), ('-"', '"'), ('-', ''), ('-.', '.'), ('-', ''), ('....-', '....'), ('-', ''), ('-', ''), ('-', ''), ('"....-', '"....'), ('............nen-', '............nen'), ('--..z..', '-..z..'), ('I-', 'I'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('.-', '.'), ('--', '-'), ('-', ''), ('--Ve', '-Ve'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-a-', 'a-'), ('-', ''), ('.-', '.'), ('-a.Cdr', 'a.Cdr'), ('-', ''), ('eartc-', 'eartc'), ('--', '-'), ('-.-', '.-'), ('..-', '..'), ('-..-.....', '..-.....'), ('-', ''), ('....-', '....'), ('-', ''), ('.--', '.-'), ('--.....', '-.....'), ('-', ''), ('.-', '.'), ("-'..", "'.."), ('-', ''), ('-', ''), ('-', ''), ('-riK-', 'riK-'), ('-', ''), ('-.', '.'), ('--', '-'), ('---', '--'), ('--', '-'), ('-', ''), ('-', ''), ('-.', '.'), ('--r', '-r'), ('.--', '.-'), ('-', ''), ('-...-', '...-')]
LibM19120701-V07-03-page42.txt: [('-', '')]
LibM19120701-V07-03-page43.txt: [('fun-', 'fun'), ('ap-', 'ap')]
LibM19120701-V07-03-page46.txt: [('-', ''), ('-', ''), ('.-', '.')]
LibM19120701-V07-03-page47.txt: [('-', '')]
LibM19120701-V07-03-page48.txt: [('-', ''), ('-', '')]
LibM19120701-V07-03-page49.txt: [('-', ''), ('gentle-', 'gentle')]
LibM19120701-V07-03-page5.txt: [('-', '')]
LibM19120701-V07-03-page51.txt: [('Gov-', 'Gov'), ('dis-', 'dis')]
LibM19120701-V07-03-page52.txt: [('-.', '.'), ('re-', 're'), ('-', ''), ('We-', 'We'), ('-', '')]
LibM19120701-V07-03-page9.txt: [('Pan-', 'Pan'), ('November-', 'November'), ('observ-', 'observ')]
LibM19121001-V07-04-page13.txt: [('-', '')]
LibM19121001-V07-04-page14.txt: [('non-', 'non')]
LibM19121001-V07-04-page15.txt: [('-', ''), ('Postmaster-', 'Postmaster')]
LibM19121001-V07-04-page17.txt: [('-', '')]
LibM19121001-V07-04-page19.txt: [('-', '')]
LibM19121001-V07-04-page2.txt: [('.-', '.'), ('-', ''), ('-', ''), ('Steph-', 'Steph')]
LibM19121001-V07-04-page20.txt: [('mat-', 'mat')]
LibM19121001-V07-04-page21.txt: [('Cath-', 'Cath')]
LibM19121001-V07-04-page23.txt: [('-the', 'the')]
LibM19121001-V07-04-page29.txt: [('deter-', 'deter'), ('-', ''), ('constru-', 'constru'), ('spe-', 'spe')]
LibM19121001-V07-04-page3.txt: [('-', '')]
LibM19121001-V07-04-page31.txt: [('-', '')]
LibM19121001-V07-04-page32.txt: [('-', '')]
LibM19121001-V07-04-page41.txt: [('seek-', 'seek')]
LibM19121001-V07-04-page44.txt: [('relation-', 'relation')]
LibM19121001-V07-04-page5.txt: [('ad-', 'ad')]
LibM19121001-V07-04-page50.txt: [('-..', '..')]
LibM19121001-V07-04-page51.txt: [('-', ''), ('-', ''), ('Answers-', 'Answers')]
LibM19121001-V07-04-page6.txt: [('Orion-', 'Orion'), ('.-', '.'), ('.raityr-', '.raityr'), ('neer.-', 'neer.'), ('V-', 'V'), ('mow-', 'mow')]
LibM19121001-V07-04-page7.txt: [('.-', '.'), ('-', ''), ('-', ''), ('-', '')]
LibM19121001-V07-04-page9.txt: [('-', ''), ('ma-', 'ma')]
LibM19130101-V08-01-page10.txt: [('think-', 'think')]
LibM19130101-V08-01-page11.txt: [('considera-', 'considera')]
LibM19130101-V08-01-page14.txt: [('vigor-', 'vigor')]
LibM19130101-V08-01-page15.txt: [('re-', 're')]
LibM19130101-V08-01-page17.txt: [('---', '--')]
LibM19130101-V08-01-page2.txt: [('Co-', 'Co'), ('lhan-', 'lhan'), ('-', ''), ('MitaM.O.D.mroo.M.O.m.-', 'MitaM.O.D.mroo.M.O.m.')]
LibM19130101-V08-01-page22.txt: [('pro-', 'pro')]
LibM19130101-V08-01-page24.txt: [('-', ''), ('LIB-', 'LIB')]
LibM19130101-V08-01-page25.txt: [('in-', 'in')]
LibM19130101-V08-01-page26.txt: [('inves-', 'inves')]
LibM19130101-V08-01-page27.txt: [('det-', 'det')]
LibM19130101-V08-01-page3.txt: [('-', ''), ('pre-', 'pre'), ('there-', 'there'), ('un-', 'un'), ('-', ''), ('SECOND.-', 'SECOND.'), ('pur-', 'pur'), ('FIRST.-', 'FIRST.'), ('ad-', 'ad')]
LibM19130101-V08-01-page31.txt: [('Atlas-', 'Atlas'), ('individ-', 'individ')]
LibM19130101-V08-01-page32.txt: [('-', ''), ('-', '')]
LibM19130101-V08-01-page35.txt: [('-', '')]
LibM19130101-V08-01-page37.txt: [('-', '')]
LibM19130101-V08-01-page4.txt: [('maga-', 'maga')]
LibM19130101-V08-01-page40.txt: [('-', ''), ('state-', 'state')]
LibM19130101-V08-01-page42.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('freight-', 'freight'), ('ordi-', 'ordi'), ('-went', 'went'), ('-', ''), ('-', '')]
LibM19130101-V08-01-page43.txt: [('-', ''), ('-', ''), ('-bridges', 'bridges'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('x-', 'x'), ('-', ''), ('-', ''), ('-', ''), ('-Io', 'Io'), ('-', ''), ('-', '')]
LibM19130101-V08-01-page44.txt: [('work-', 'work'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('serv-', 'serv')]
LibM19130101-V08-01-page45.txt: [('-', ''), ('xo-', 'xo'), ('i-', 'i'), ('-', ''), ('-', ''), ('x-', 'x'), ('s-', 's'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19130101-V08-01-page46.txt: [('-', '')]
LibM19130101-V08-01-page5.txt: [('ad-', 'ad'), ('-', '')]
LibM19130101-V08-01-page50.txt: [('An-', 'An'), ('Ar-', 'Ar'), ('-AMERICAN', 'AMERICAN'), ('Re-', 'Re'), ('So-', 'So'), ('-', ''), ('POST-', 'POST')]
LibM19130101-V08-01-page51.txt: [('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('r---------', 'r--------'), ('-Nr', 'Nr'), ('-', ''), ('-', ''), ('c-', 'c')]
LibM19130101-V08-01-page52.txt: [('-page', 'page')]
LibM19130101-V08-01-page9.txt: [('Na-', 'Na')]
LibM19130401-V08-02-page12.txt: [('min-', 'min')]
LibM19130401-V08-02-page14.txt: [('Co-', 'Co')]
LibM19130401-V08-02-page2.txt: [('-earoominmerk', 'earoominmerk'), ('al-', 'al'), ('-', ''), ('affil-', 'affil')]
LibM19130401-V08-02-page21.txt: [('-', '')]
LibM19130401-V08-02-page22.txt: [('-is', 'is')]
LibM19130401-V08-02-page24.txt: [('-', ''), ('-', ''), ('pro-', 'pro')]
LibM19130401-V08-02-page25.txt: [('-', '')]
LibM19130401-V08-02-page27.txt: [('rea-', 'rea')]
LibM19130401-V08-02-page28.txt: [('-', '')]
LibM19130401-V08-02-page3.txt: [('ad-', 'ad'), ('pur-', 'pur'), ('CITI-', 'CITI'), ('PRE-', 'PRE'), ('Strug-', 'Strug'), ('CHOOS-', 'CHOOS'), ('enjoy-', 'enjoy'), ('PRIN-', 'PRIN'), ('sub-', 'sub'), ('whole-', 'whole')]
LibM19130401-V08-02-page30.txt: [('.ex-', '.ex'), ('-', ''), ('Philadel-', 'Philadel'), ('reso-', 'reso'), ('Scot-', 'Scot'), ('visit-', 'visit'), ('set-', 'set'), ('his-', 'his'), ('re-', 're'), ('hu-', 'hu'), ('con-', 'con')]
LibM19130401-V08-02-page34.txt: [('hav-', 'hav'), ('cer-', 'cer'), ('un-', 'un'), ('ac-', 'ac'), ('ad-', 'ad'), ('maintain-', 'maintain')]
LibM19130401-V08-02-page39.txt: [('mil-', 'mil')]
LibM19130401-V08-02-page4.txt: [('recom-', 'recom')]
LibM19130401-V08-02-page42.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-shops.', 'shops.')]
LibM19130401-V08-02-page43.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('subse-', 'subse'), ('o-', 'o'), ('sub-', 'sub'), ('loo-', 'loo'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19130401-V08-02-page44.txt: [('-', ''), ('-', ''), ('-', ''), ('-r', 'r'), ('Sat-', 'Sat'), ('-', ''), ('-a', 'a'), ('-a', 'a')]
LibM19130401-V08-02-page45.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19130401-V08-02-page46.txt: [('-', ''), ('o-', 'o'), ('begin-', 'begin')]
LibM19130401-V08-02-page47.txt: [('-', '')]
LibM19130401-V08-02-page49.txt: [('Albu-', 'Albu')]
LibM19130401-V08-02-page5.txt: [('ad-', 'ad')]
LibM19130401-V08-02-page50.txt: [('Ar-', 'Ar'), ('An-', 'An'), ('Re-', 'Re'), ('-', ''), ('POST-', 'POST')]
LibM19130401-V08-02-page51.txt: [('-', ''), ('----', '---'), ('--', '-'), ('-----', '----'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.-', '.-'), ('-.-.', '.-.'), ('.-', '.'), ('.-', '.'), ('---"', '--"'), ('--.', '-.'), ('-Mt', 'Mt'), ('-', ''), ('JUSTI-', 'JUSTI')]
LibM19130401-V08-02-page52.txt: [('-page', 'page')]
LibM19130701-V08-03-page10.txt: [('Mc-', 'Mc'), ('Re-', 'Re')]
LibM19130701-V08-03-page14.txt: [('-T.', 'T.')]
LibM19130701-V08-03-page17.txt: [('al-', 'al')]
LibM19130701-V08-03-page18.txt: [('exer-', 'exer')]
LibM19130701-V08-03-page2.txt: [('Seen-p.deffeatv-', 'Seen-p.deffeatv'), ('-eury.', 'eury.'), ('-eiteile', 'eiteile'), ('rhah-', 'rhah'), ('-', ''), ('-eeedie', 'eeedie'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-Yezaedi', 'Yezaedi'), ('-eiraeznactmew', 'eiraeznactmew'), ('-erga-evia', 'erga-evia'), ('-W', 'W'), ('-e', 'e'), ('--elt', '-elt'), ('-e', 'e'), ('MgetOofm-', 'MgetOofm'), ('SaFVtel-', 'SaFVtel'), ('-ix', 'ix')]
LibM19130701-V08-03-page21.txt: [('sena-', 'sena')]
LibM19130701-V08-03-page22.txt: [('corn-', 'corn')]
LibM19130701-V08-03-page26.txt: [('-', ''), ('-', ''), ("-'", "'"), ('-', ''), ('-', '')]
LibM19130701-V08-03-page27.txt: [('-.', '.'), ('...-', '...'), ('-', ''), ('.......--', '.......-'), ('-', ''), ('-....', '....'), ('-"..r...', '"..r...'), ('-', ''), ('-', ''), ('-', ''), ('-.-', '.-'), ('-.', '.'), ('-.', '.'), ('-', ''), ('-...', '...'), ('----.--', '---.--'), ('-.........', '.........'), ('-........"', '........"'), ('-', ''), ('-', ''), ('-', '')]
LibM19130701-V08-03-page29.txt: [('Sun-', 'Sun'), ('restric-', 'restric'), ('re-', 're')]
LibM19130701-V08-03-page3.txt: [('--HE', '-HE'), ('CITIZEN-', 'CITIZEN'), ('CHOOS-', 'CHOOS'), ('enjoy-', 'enjoy'), ('PRIN-', 'PRIN')]
LibM19130701-V08-03-page30.txt: [('exer-', 'exer')]
LibM19130701-V08-03-page32.txt: [('in-', 'in')]
LibM19130701-V08-03-page33.txt: [('pub-', 'pub')]
LibM19130701-V08-03-page36.txt: [('con-', 'con')]
LibM19130701-V08-03-page39.txt: [('hear-', 'hear'), ('Commis-', 'Commis'), ('Sun-', 'Sun'), ('move-', 'move'), ('Chris-', 'Chris')]
LibM19130701-V08-03-page4.txt: [('-', '')]
LibM19130701-V08-03-page41.txt: [('re-', 're')]
LibM19130701-V08-03-page42.txt: [('GOV-', 'GOV')]
LibM19130701-V08-03-page44.txt: [('-t', 't'), ('-', ''), ('cd-n-', 'cd-n'), ('-ca.z', 'ca.z'), ('zW-', 'zW'), ('A-', 'A')]
LibM19130701-V08-03-page49.txt: [('-', ''), ('-', ''), ('-ICIT', 'ICIT'), ('ADVER-', 'ADVER'), ('-', ''), ('-', ''), ('-eX', 'eX')]
LibM19130701-V08-03-page5.txt: [('-', ''), ('ad-', 'ad')]
LibM19130701-V08-03-page50.txt: [('An-', 'An')]
LibM19130701-V08-03-page51.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19130701-V08-03-page6.txt: [('-', ''), ('--', '-')]
LibM19130701-V08-03-page8.txt: [('command-', 'command')]
LibM19130701-V08-03-page9.txt: [('Mc-', 'Mc'), ('Mc-', 'Mc')]
LibM19131001-V08-04-page10.txt: [('un-', 'un')]
LibM19131001-V08-04-page11.txt: [('state-estab-', 'state-estab')]
LibM19131001-V08-04-page12.txt: [('-', '')]
LibM19131001-V08-04-page13.txt: [('Sun-', 'Sun'), ('with-', 'with'), ('extrav-', 'extrav'), ('preseri-', 'preseri')]
LibM19131001-V08-04-page14.txt: [('Babylo-', 'Babylo')]
LibM19131001-V08-04-page18.txt: [('--', '-')]
LibM19131001-V08-04-page2.txt: [('-', '')]
LibM19131001-V08-04-page20.txt: [('-', '')]
LibM19131001-V08-04-page22.txt: [('mo-', 'mo')]
LibM19131001-V08-04-page25.txt: [('ex-', 'ex'), ('ex-', 'ex'), ('Sun-', 'Sun'), ('ex-', 'ex')]
LibM19131001-V08-04-page26.txt: [('pre-', 'pre')]
LibM19131001-V08-04-page28.txt: [('-uncontrolled', 'uncontrolled')]
LibM19131001-V08-04-page29.txt: [('in-', 'in')]
LibM19131001-V08-04-page3.txt: [('CHOOS-', 'CHOOS'), ('PRIN-', 'PRIN'), ('enjoy-', 'enjoy'), ('intol-', 'intol'), ('sub-', 'sub'), ('whole-', 'whole'), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('ad-', 'ad'), ("'q-", "'q")]
LibM19131001-V08-04-page30.txt: [('com-', 'com')]
LibM19131001-V08-04-page31.txt: [('Re-', 'Re')]
LibM19131001-V08-04-page33.txt: [('ab-', 'ab')]
LibM19131001-V08-04-page34.txt: [('uni-', 'uni'), ('prog-', 'prog')]
LibM19131001-V08-04-page36.txt: [('-', ''), ('-', ''), ('-', ''), ('relig-', 'relig')]
LibM19131001-V08-04-page39.txt: [('--', '-')]
LibM19131001-V08-04-page4.txt: [('-', ''), ('-', ''), ('-o', 'o')]
LibM19131001-V08-04-page41.txt: [('-', '')]
LibM19131001-V08-04-page43.txt: [('-questions', 'questions')]
LibM19131001-V08-04-page44.txt: [('govern-', 'govern')]
LibM19131001-V08-04-page45.txt: [('D-', 'D'), ('-', '')]
LibM19131001-V08-04-page46.txt: [('-is', 'is')]
LibM19131001-V08-04-page49.txt: [('ADVER-', 'ADVER')]
LibM19131001-V08-04-page5.txt: [('ad-', 'ad')]
LibM19131001-V08-04-page50.txt: [('-', ''), ('-', ''), ('Ar-', 'Ar')]
LibM19131001-V08-04-page51.txt: [('mission-', 'mission')]
LibM19131001-V08-04-page52.txt: [('--', '-'), ('-', ''), ("'.-", "'."), ('-', ''), ('-', ''), ('-.IA', '.IA')]
LibM19131001-V08-04-page7.txt: [('-MMI.', 'MMI.'), ('M.-', 'M.'), ('-MED.', 'MED.'), ('-', ''), ('-rthe', 'rthe')]
LibM19140101-V09-01-page1.txt: [('-', '')]
LibM19140101-V09-01-page11.txt: [('-MWOO', 'MWOO'), ('-', '')]
LibM19140101-V09-01-page18.txt: [('-I', 'I'), ('-', ''), ('-from', 'from'), ('prin-', 'prin')]
LibM19140101-V09-01-page19.txt: [('cler-', 'cler'), ('-that', 'that')]
LibM19140101-V09-01-page2.txt: [('-mm.', 'mm.'), ('-', '')]
LibM19140101-V09-01-page21.txt: [('-all', 'all')]
LibM19140101-V09-01-page23.txt: [('-entered', 'entered'), ('heaven-', 'heaven'), ('-', ''), ('govern-', 'govern'), ('syn-', 'syn'), ('be-', 'be'), ('-result', 'result'), ('with-', 'with')]
LibM19140101-V09-01-page25.txt: [('-rights', 'rights')]
LibM19140101-V09-01-page26.txt: [('-our', 'our'), ('con-', 'con')]
LibM19140101-V09-01-page27.txt: [('-for', 'for')]
LibM19140101-V09-01-page29.txt: [('-', ''), ('-Sabbath', 'Sabbath'), ('un-', 'un')]
LibM19140101-V09-01-page3.txt: [('LIBER-', 'LIBER'), ('-inch', 'inch'), ('CHANG-', 'CHANG'), ('CARE-', 'CARE')]
LibM19140101-V09-01-page30.txt: [('king--', 'king-'), ('-', '')]
LibM19140101-V09-01-page31.txt: [('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', '')]
LibM19140101-V09-01-page33.txt: [('-are', 'are')]
LibM19140101-V09-01-page36.txt: [('-', '')]
LibM19140101-V09-01-page38.txt: [('Chris-', 'Chris'), ('com-', 'com'), ('Re-', 'Re'), ('-', ''), ('-', '')]
LibM19140101-V09-01-page43.txt: [('prob-', 'prob')]
LibM19140101-V09-01-page44.txt: [('BUILD-', 'BUILD')]
LibM19140101-V09-01-page46.txt: [('say-', 'say'), ('an--', 'an-')]
LibM19140101-V09-01-page47.txt: [('-', '')]
LibM19140101-V09-01-page48.txt: [('citi-', 'citi')]
LibM19140101-V09-01-page52.txt: [('-.', '.')]
LibM19140101-V09-01-page53.txt: [('-', ''), ('e.A-', 'e.A'), ('Ar-', 'Ar'), ('An-', 'An')]
LibM19140101-V09-01-page54.txt: [('-i', 'i'), ('Albu-', 'Albu'), ('Aven-', 'Aven')]
LibM19140101-V09-01-page55.txt: [('-', ''), ('-.', '.')]
LibM19140101-V09-01-page56.txt: [('-VoPr', 'VoPr'), ('-', ''), ('-NA', 'NA'), ('-.N', '.N')]
LibM19140101-V09-01-page8.txt: [('-', '')]
LibM19140101-V09-01-page9.txt: [('ad-', 'ad')]
LibM19140401-V09-02-page1.txt: [('--gm', '-gm')]
LibM19140401-V09-02-page11.txt: [('-', ''), ('be-', 'be')]
LibM19140401-V09-02-page12.txt: [('al-', 'al'), ('combina-', 'combina'), ('coun-', 'coun'), ('un-', 'un')]
LibM19140401-V09-02-page13.txt: [('-object', 'object'), ('.-', '.')]
LibM19140401-V09-02-page14.txt: [('-intolerant', 'intolerant'), ('prod-', 'prod')]
LibM19140401-V09-02-page15.txt: [('Sun-', 'Sun')]
LibM19140401-V09-02-page16.txt: [('-', '')]
LibM19140401-V09-02-page17.txt: [('-', ''), ('ob-', 'ob')]
LibM19140401-V09-02-page18.txt: [('ASSEM-', 'ASSEM')]
LibM19140401-V09-02-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('af-', 'af'), ('-', '')]
LibM19140401-V09-02-page22.txt: [('Con-', 'Con')]
LibM19140401-V09-02-page25.txt: [('-', ''), ('-', ''), ('citi-', 'citi'), ('stat-', 'stat')]
LibM19140401-V09-02-page26.txt: [('prop-', 'prop')]
LibM19140401-V09-02-page27.txt: [('funda-', 'funda'), ('-', '')]
LibM19140401-V09-02-page29.txt: [('-', '')]
LibM19140401-V09-02-page3.txt: [('CIRCULAT-', 'CIRCULAT')]
LibM19140401-V09-02-page30.txt: [('forty-', 'forty')]
LibM19140401-V09-02-page32.txt: [('-', '')]
LibM19140401-V09-02-page33.txt: [('en-', 'en')]
LibM19140401-V09-02-page35.txt: [('PROTES-', 'PROTES')]
LibM19140401-V09-02-page36.txt: [('in-', 'in')]
LibM19140401-V09-02-page38.txt: [('-', '')]
LibM19140401-V09-02-page41.txt: [('MAGA-', 'MAGA')]
LibM19140401-V09-02-page43.txt: [('BE-', 'BE'), ('-', '')]
LibM19140401-V09-02-page44.txt: [('A-i-', 'A-i'), ('.-', '.'), ('PARTNER-', 'PARTNER')]
LibM19140401-V09-02-page46.txt: [('-', '')]
LibM19140401-V09-02-page48.txt: [('-.', '.')]
LibM19140401-V09-02-page49.txt: [('---.', '--.'), ('---il', '--il')]
LibM19140401-V09-02-page5.txt: [('ad-', 'ad')]
LibM19140401-V09-02-page50.txt: [('ADVER-', 'ADVER')]
LibM19140401-V09-02-page52.txt: [('dan-', 'dan'), ('stern-', 'stern'), ('in-', 'in'), ('re-', 're'), ('-', ''), ('-', '')]
LibM19140401-V09-02-page6.txt: [('-', '')]
LibM19140401-V09-02-page7.txt: [('-', ''), ('MWO-', 'MWO'), ('MOD-', 'MOD'), ('glo-', 'glo')]
LibM19140701-V09-03-page10.txt: [('sacra-', 'sacra'), ('-Surely', 'Surely'), ('op-', 'op')]
LibM19140701-V09-03-page11.txt: [('estab-', 'estab')]
LibM19140701-V09-03-page12.txt: [('--', '-')]
LibM19140701-V09-03-page15.txt: [('transi-', 'transi')]
LibM19140701-V09-03-page17.txt: [('sub-', 'sub'), ('re-', 're')]
LibM19140701-V09-03-page2.txt: [('-', '')]
LibM19140701-V09-03-page24.txt: [('-', ''), ('-', '')]
LibM19140701-V09-03-page27.txt: [('-ence', 'ence')]
LibM19140701-V09-03-page29.txt: [('-', '')]
LibM19140701-V09-03-page3.txt: [('-', ''), ('-', ''), ('CIRCULAT-', 'CIRCULAT')]
LibM19140701-V09-03-page31.txt: [('free-', 'free')]
LibM19140701-V09-03-page33.txt: [('se-', 'se')]
LibM19140701-V09-03-page34.txt: [('prop-', 'prop'), ('-', ''), ('ambi-', 'ambi')]
LibM19140701-V09-03-page35.txt: [('-', '')]
LibM19140701-V09-03-page36.txt: [('-', ''), ('rea-', 'rea'), ('Chris-', 'Chris')]
LibM19140701-V09-03-page39.txt: [('-', ''), ('boy-', 'boy')]
LibM19140701-V09-03-page4.txt: [('magazine-', 'magazine'), ('-', '')]
LibM19140701-V09-03-page40.txt: [('itsfunda-', 'itsfunda'), ('-theft', 'theft')]
LibM19140701-V09-03-page42.txt: [('-Most', 'Most'), ('-', '')]
LibM19140701-V09-03-page44.txt: [('prohibit-', 'prohibit')]
LibM19140701-V09-03-page48.txt: [('-.', '.')]
LibM19140701-V09-03-page49.txt: [('-', ''), ('k-', 'k'), ('-i..', 'i..'), ('arwl-A-', 'arwl-A'), ('-', ''), ('-"', '"'), ("'-", "'"), ('-', '')]
LibM19140701-V09-03-page5.txt: [('-', ''), ('ad-', 'ad')]
LibM19140701-V09-03-page51.txt: [('V-', 'V'), ('-The', 'The'), ('."-', '."')]
LibM19140701-V09-03-page7.txt: [('be-', 'be')]
LibM19141001-V09-04-page10.txt: [('-', ''), ('-', ''), ('fail-', 'fail')]
LibM19141001-V09-04-page11.txt: [('-', ''), ('-the', 'the')]
LibM19141001-V09-04-page13.txt: [('bless-', 'bless'), ('re-', 're')]
LibM19141001-V09-04-page14.txt: [('re-', 're')]
LibM19141001-V09-04-page18.txt: [('Robes-', 'Robes'), ('be-', 'be')]
LibM19141001-V09-04-page19.txt: [("-law.'", "law.'")]
LibM19141001-V09-04-page2.txt: [('-', ''), ('-', '')]
LibM19141001-V09-04-page22.txt: [('penal-', 'penal')]
LibM19141001-V09-04-page26.txt: [('TI-', 'TI')]
LibM19141001-V09-04-page27.txt: [('en-', 'en'), ('say-', 'say')]
LibM19141001-V09-04-page29.txt: [('Medo-', 'Medo'), ('es-', 'es')]
LibM19141001-V09-04-page30.txt: [('-in', 'in'), ('-note', 'note')]
LibM19141001-V09-04-page31.txt: [('AMERI-', 'AMERI')]
LibM19141001-V09-04-page33.txt: [('an-', 'an'), ('-', ''), ('Star-', 'Star'), ('-', ''), ('-', '')]
LibM19141001-V09-04-page34.txt: [('-', ''), ('STAR-', 'STAR')]
LibM19141001-V09-04-page35.txt: [('-', ''), ('rz-', 'rz'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('.--', '.-'), ('-rs.', 'rs.'), ('-', ''), ('-', ''), ('.-', '.'), ('-', '')]
LibM19141001-V09-04-page36.txt: [('espe--', 'espe-'), ('--', '-')]
LibM19141001-V09-04-page37.txt: [('wor-', 'wor')]
LibM19141001-V09-04-page38.txt: [('es-', 'es')]
LibM19141001-V09-04-page39.txt: [('-', '')]
LibM19141001-V09-04-page4.txt: [('ad-', 'ad'), ('M-', 'M')]
LibM19141001-V09-04-page42.txt: [('op-', 'op')]
LibM19141001-V09-04-page43.txt: [('here.-', 'here.'), ("'-", "'")]
LibM19141001-V09-04-page44.txt: [('away.-', 'away.')]
LibM19141001-V09-04-page46.txt: [('-', ''), ('"-', '"')]
LibM19141001-V09-04-page48.txt: [('-', '')]
LibM19141001-V09-04-page49.txt: [('-', '')]
LibM19141001-V09-04-page50.txt: [('Twenty-', 'Twenty'), ('-.-', '.-'), ('Mili-', 'Mili'), ('Hala-', 'Hala'), ('-.', '.'), ('At-', 'At'), ('Lan-', 'Lan'), ('-rli', 'rli'), ('Tram-', 'Tram'), ('J-', 'J'), ('Pe-', 'Pe'), ('Albu-', 'Albu'), ('LI-', 'LI'), ('Bloom-', 'Bloom'), ('--', '-'), ('-.', '.'), ('-', ''), ('-', '')]
LibM19141001-V09-04-page51.txt: [('-', ''), ('--', '-'), ('.f------', '.f-----'), ('-----', '----'), ('-TESTINC', 'TESTINC'), ('-i', 'i'), ('monarchi-', 'monarchi'), ('Con-', 'Con'), ('-', '')]
LibM19141001-V09-04-page52.txt: [('DR.A-', 'DR.A'), ('-', ''), ('-.', '.')]
LibM19141001-V09-04-page7.txt: [('-', ''), ('-', ''), ('-.MM', '.MM')]
LibM19141001-V09-04-page8.txt: [('-', ''), ('na-', 'na'), ('Har-', 'Har'), ('-', '')]
LibM19141001-V09-04-page9.txt: [('-', ''), ('-', '')]
LibM19150101-V10-01-page10.txt: [('-', '')]
LibM19150101-V10-01-page11.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19150101-V10-01-page13.txt: [('Con-', 'Con')]
LibM19150101-V10-01-page15.txt: [('re-', 're')]
LibM19150101-V10-01-page16.txt: [('-', ''), ('-i', 'i'), ('---', '--')]
LibM19150101-V10-01-page17.txt: [('discrimi-', 'discrimi')]
LibM19150101-V10-01-page18.txt: [('-', '')]
LibM19150101-V10-01-page2.txt: [('pre-', 'pre'), ('-', ''), ('affil-', 'affil')]
LibM19150101-V10-01-page20.txt: [('-', '')]
LibM19150101-V10-01-page25.txt: [('-io.', 'io.'), ('-', ''), ('-', ''), ('destruc-', 'destruc')]
LibM19150101-V10-01-page26.txt: [('-', ''), ('-', '')]
LibM19150101-V10-01-page27.txt: [('declared-', 'declared')]
LibM19150101-V10-01-page28.txt: [('Sat-', 'Sat')]
LibM19150101-V10-01-page29.txt: [('viola-', 'viola')]
LibM19150101-V10-01-page3.txt: [('magae-', 'magae'), ('SUBSCRIP-', 'SUBSCRIP')]
LibM19150101-V10-01-page30.txt: [('-I', 'I')]
LibM19150101-V10-01-page31.txt: [('prohibit-', 'prohibit')]
LibM19150101-V10-01-page34.txt: [('Star-', 'Star')]
LibM19150101-V10-01-page35.txt: [('cathe-', 'cathe')]
LibM19150101-V10-01-page36.txt: [('of-', 'of')]
LibM19150101-V10-01-page38.txt: [('fol-', 'fol')]
LibM19150101-V10-01-page39.txt: [('-The', 'The')]
LibM19150101-V10-01-page41.txt: [('A-', 'A'), ('-', '')]
LibM19150101-V10-01-page42.txt: [('to-', 'to')]
LibM19150101-V10-01-page45.txt: [('-', '')]
LibM19150101-V10-01-page46.txt: [('-', '')]
LibM19150101-V10-01-page47.txt: [('or-', 'or')]
LibM19150101-V10-01-page48.txt: [('Alco-', 'Alco')]
LibM19150101-V10-01-page50.txt: [('Sunday.-', 'Sunday.'), ('-', ''), ('-sorrow', 'sorrow'), ('-', ''), ('-', ''), ('an-', 'an')]
LibM19150101-V10-01-page51.txt: [('Ti-', 'Ti'), ('-', '')]
LibM19150101-V10-01-page52.txt: [('Mill-', 'Mill'), ('Rap-', 'Rap'), ('.mmmmEiv-', '.mmmmEiv'), ('Trum-', 'Trum'), ('Pe-', 'Pe'), ('Lan-', 'Lan'), ('Luck-', 'Luck'), ('Alba-', 'Alba'), ('Aven-', 'Aven'), ('Bloom-', 'Bloom'), ('-', '')]
LibM19150101-V10-01-page53.txt: [('-', ''), ('FREE-', 'FREE'), ('-', '')]
LibM19150101-V10-01-page8.txt: [('-', '')]
LibM19150401-V10-02-page11.txt: [('intro-', 'intro')]
LibM19150401-V10-02-page12.txt: [('litho-', 'litho'), ('Corn-', 'Corn')]
LibM19150401-V10-02-page14.txt: [('Postmaster-', 'Postmaster'), ('pam-', 'pam')]
LibM19150401-V10-02-page15.txt: [('Postmaster-', 'Postmaster')]
LibM19150401-V10-02-page17.txt: [('-', ''), ('-legislation.', 'legislation.'), ('un-', 'un'), ('pub-', 'pub')]
LibM19150401-V10-02-page18.txt: [('of-', 'of'), ('re-', 're')]
LibM19150401-V10-02-page19.txt: [('Mc-', 'Mc')]
LibM19150401-V10-02-page2.txt: [('-', '')]
LibM19150401-V10-02-page21.txt: [('RE-', 'RE')]
LibM19150401-V10-02-page23.txt: [('free-', 'free'), ('WASH-', 'WASH'), ('reli-', 'reli')]
LibM19150401-V10-02-page25.txt: [('WASH-', 'WASH'), ('Postmaster-', 'Postmaster'), ('de-', 'de'), ('WASH-', 'WASH')]
LibM19150401-V10-02-page26.txt: [('Postmaster-', 'Postmaster')]
LibM19150401-V10-02-page27.txt: [('Cath-', 'Cath')]
LibM19150401-V10-02-page28.txt: [('CAP-', 'CAP')]
LibM19150401-V10-02-page29.txt: [('per-', 'per')]
LibM19150401-V10-02-page3.txt: [('cer-', 'cer'), ('Hear-', 'Hear')]
LibM19150401-V10-02-page30.txt: [('or-', 'or')]
LibM19150401-V10-02-page32.txt: [('gen-', 'gen')]
LibM19150401-V10-02-page36.txt: [('Lot-', 'Lot')]
LibM19150401-V10-02-page38.txt: [('-the', 'the')]
LibM19150401-V10-02-page4.txt: [('.-', '.')]
LibM19150401-V10-02-page41.txt: [('-the', 'the')]
LibM19150401-V10-02-page43.txt: [('.-', '.')]
LibM19150401-V10-02-page44.txt: [('un-', 'un'), ('-', '')]
LibM19150401-V10-02-page46.txt: [('pre-', 'pre'), ('sub-', 'sub'), ("'O-", "'O")]
LibM19150401-V10-02-page48.txt: [('-', ''), ('sa-', 'sa'), ('busi-', 'busi'), ('hence-', 'hence'), ('-', '')]
LibM19150401-V10-02-page49.txt: [('-', ''), ('--', '-'), ('HUN-', 'HUN')]
LibM19150401-V10-02-page5.txt: [('Philip-', 'Philip')]
LibM19150401-V10-02-page50.txt: [('-', ''), ('-', ''), ('.-', '.'), ('-M.', 'M.'), ('.-', '.'), ('signifi-', 'signifi'), ('-', ''), ('-', '')]
LibM19150401-V10-02-page51.txt: [('.--', '.-'), ('-', ''), ('.-', '.'), ('-PER', 'PER')]
LibM19150401-V10-02-page52.txt: [('-', ''), ('-', ''), ('-', ''), ('iiimm--', 'iiimm-'), ('---', '--'), ('Ad-', 'Ad')]
LibM19150401-V10-02-page6.txt: [('---', '--'), ('kc-', 'kc'), ('-', ''), ('PEACE-', 'PEACE'), ('ASSEMB-', 'ASSEMB'), ('-', ''), ('-', ''), ('lost.-', 'lost.'), ('ri-', 'ri'), ('-K', 'K')]
LibM19150401-V10-02-page7.txt: [('-', ''), ('-', '')]
LibM19150401-V10-02-page9.txt: [('-', '')]
LibM19150701-V10-03-page1.txt: [('--', '-')]
LibM19150701-V10-03-page10.txt: [('-', '')]
LibM19150701-V10-03-page14.txt: [('be-', 'be')]
LibM19150701-V10-03-page15.txt: [('-', '')]
LibM19150701-V10-03-page17.txt: [('-', '')]
LibM19150701-V10-03-page2.txt: [('inter-', 'inter'), ('Col-', 'Col'), ('affil-', 'affil'), ('affili-', 'affili'), ('Massa-', 'Massa'), ('Connecti-', 'Connecti')]
LibM19150701-V10-03-page21.txt: [('-', '')]
LibM19150701-V10-03-page22.txt: [('-', ''), ('Chris-', 'Chris'), ('Eng-', 'Eng')]
LibM19150701-V10-03-page25.txt: [('plot-', 'plot')]
LibM19150701-V10-03-page26.txt: [('meth-', 'meth')]
LibM19150701-V10-03-page27.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19150701-V10-03-page28.txt: [('degen-', 'degen')]
LibM19150701-V10-03-page3.txt: [('illus-', 'illus'), ('pro-', 'pro'), ('prohibi-', 'prohibi'), ('SUB-', 'SUB'), ('attor-', 'attor'), ('-', '')]
LibM19150701-V10-03-page30.txt: [('princi-', 'princi')]
LibM19150701-V10-03-page33.txt: [('clas-', 'clas')]
LibM19150701-V10-03-page36.txt: [('zeal-', 'zeal')]
LibM19150701-V10-03-page4.txt: [('ad-', 'ad')]
LibM19150701-V10-03-page42.txt: [('-', ''), ('-sAfd', 'sAfd'), ('-', '')]
LibM19150701-V10-03-page45.txt: [('-', ''), ('-', '')]
LibM19150701-V10-03-page46.txt: [('-', '')]
LibM19150701-V10-03-page47.txt: [('-', ''), ('-', ''), ('caus-', 'caus')]
LibM19150701-V10-03-page49.txt: [('-', '')]
LibM19150701-V10-03-page50.txt: [('Fa-', 'Fa')]
LibM19150701-V10-03-page8.txt: [('-', '')]
LibM19151001-V10-04-page1.txt: [('-', '')]
LibM19151001-V10-04-page10.txt: [('-', '')]
LibM19151001-V10-04-page11.txt: [('lib-', 'lib')]
LibM19151001-V10-04-page12.txt: [('-', '')]
LibM19151001-V10-04-page14.txt: [('pub-', 'pub')]
LibM19151001-V10-04-page15.txt: [('-', ''), ('-', '')]
LibM19151001-V10-04-page18.txt: [('-', '')]
LibM19151001-V10-04-page19.txt: [('-', ''), ('-', ''), ('dis-', 'dis')]
LibM19151001-V10-04-page2.txt: [('-', ''), ('Col-', 'Col'), ('af-', 'af')]
LibM19151001-V10-04-page23.txt: [('former-', 'former')]
LibM19151001-V10-04-page25.txt: [('-', '')]
LibM19151001-V10-04-page26.txt: [('bul-', 'bul'), ('to-', 'to')]
LibM19151001-V10-04-page27.txt: [('-', ''), ('s-', 's'), ('-', '')]
LibM19151001-V10-04-page28.txt: [('reli-', 'reli')]
LibM19151001-V10-04-page30.txt: [('indi-', 'indi')]
LibM19151001-V10-04-page31.txt: [('-proper', 'proper'), ('-', '')]
LibM19151001-V10-04-page33.txt: [('-', '')]
LibM19151001-V10-04-page37.txt: [('-', '')]
LibM19151001-V10-04-page42.txt: [('Panama-', 'Panama'), ('repre-', 'repre')]
LibM19151001-V10-04-page45.txt: [('-', '')]
LibM19151001-V10-04-page48.txt: [('Ama-', 'Ama'), ('Eng-', 'Eng'), ('Bloom-', 'Bloom'), ('-', ''), ('go-', 'go'), ('Mili-', 'Mili')]
LibM19151001-V10-04-page49.txt: [('effec-', 'effec'), ('per-', 'per')]
LibM19151001-V10-04-page51.txt: [('Tem-', 'Tem')]
LibM19151001-V10-04-page7.txt: [('-WARDE', 'WARDE')]
LibM19160101-V11-01-page11.txt: [('legisla-', 'legisla'), ('Peru-', 'Peru')]
LibM19160101-V11-01-page12.txt: [('bish-', 'bish')]
LibM19160101-V11-01-page13.txt: [('-', ''), ('-', '')]
LibM19160101-V11-01-page18.txt: [('institu-', 'institu')]
LibM19160101-V11-01-page21.txt: [('-', '')]
LibM19160101-V11-01-page23.txt: [('-', ''), ('-', ''), ('-', ''), ('lan-', 'lan')]
LibM19160101-V11-01-page25.txt: [('prob-', 'prob')]
LibM19160101-V11-01-page27.txt: [('be-', 'be')]
LibM19160101-V11-01-page28.txt: [('-', '')]
LibM19160101-V11-01-page30.txt: [('perni-', 'perni')]
LibM19160101-V11-01-page35.txt: [('Postmaster-', 'Postmaster')]
LibM19160101-V11-01-page36.txt: [('-I', 'I')]
LibM19160101-V11-01-page4.txt: [('-', '')]
LibM19160101-V11-01-page44.txt: [('-', ''), ('-', ''), ('-e....lft', 'e....lft'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('......-', '......'), ('...-', '...'), ('-', ''), ('-', ''), ('-', ''), ('....-', '....'), ('-', ''), ('--', '-'), ('-', ''), ('".-r-', '".-r'), ('-', ''), ('-', ''), ('-', ''), ('-..', '..'), ("-'t", "'t"), ('-', ''), ('---', '--'), ('-', ''), ('-', ''), ('-', ''), ('A-', 'A'), ('--', '-'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-f...-V', 'f...-V'), ('--', '-'), ('-', ''), ('-', ''), ('-', '')]
LibM19160101-V11-01-page45.txt: [('con-', 'con')]
LibM19160101-V11-01-page5.txt: [('--', '-')]
LibM19160101-V11-01-page6.txt: [('KEN-', 'KEN')]
LibM19160101-V11-01-page8.txt: [('-', '')]
LibM19160101-V11-01-page9.txt: [('--', '-')]
LibM19160101-V11-01e-page11.txt: [('drug-', 'drug')]
LibM19160101-V11-01e-page16.txt: [('-IN', 'IN'), ('repro-', 'repro'), ('en-', 'en'), ('mail-', 'mail'), ('-with', 'with'), ('pub-', 'pub'), ('assur-', 'assur'), ('mails-', 'mails')]
LibM19160101-V11-01e-page3.txt: [('-', '')]
LibM19160101-V11-01e-page4.txt: [('liberty-', 'liberty'), ('jury.--', 'jury.-'), ('-', '')]
LibM19160101-V11-01e-page5.txt: [('senti-', 'senti')]
LibM19160101-V11-01e-page9.txt: [('P-', 'P'), ('be-', 'be')]
LibM19160401-V11-02-page1.txt: [('-', '')]
LibM19160401-V11-02-page10.txt: [('OBSERV-', 'OBSERV'), ("'Na-", "'Na")]
LibM19160401-V11-02-page13.txt: [('be-', 'be'), ('au-', 'au')]
LibM19160401-V11-02-page14.txt: [('-', ''), ('persecution.--', 'persecution.-')]
LibM19160401-V11-02-page16.txt: [('mat-', 'mat')]
LibM19160401-V11-02-page17.txt: [('censor-', 'censor')]
LibM19160401-V11-02-page18.txt: [('Corn-', 'Corn'), ('Postmaster-', 'Postmaster')]
LibM19160401-V11-02-page2.txt: [('-', ''), ('"-', '"'), ('wor-', 'wor'), ('-', ''), ('-', ''), ('fore-', 'fore'), ('prop-', 'prop'), ('-', ''), ('scurril-', 'scurril'), ('mat-', 'mat'), ('decide.-', 'decide.')]
LibM19160401-V11-02-page20.txt: [('-', '')]
LibM19160401-V11-02-page22.txt: [('often-', 'often'), ('mat-', 'mat')]
LibM19160401-V11-02-page24.txt: [('omis-', 'omis')]
LibM19160401-V11-02-page26.txt: [('-', ''), ("'-.-", "'-."), ('-.--.-', '.--.-'), ('.-', '.'), ('.-', '.'), ('-f.', 'f.'), ('Or-', 'Or'), ('-', ''), ('-', ''), ('-.-', '.-'), ('-', ''), ("'-", "'"), ('-', ''), ('.....-', '.....'), ('r-', 'r'), ('-', ''), ('----', '---'), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ("-'", "'"), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('"-', '"'), ('--', '-'), ('------.---', '-----.---'), ('...-', '...'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('..-.-.-', '..-.-.'), ('f\'"-----', 'f\'"----'), ('-...-.', '...-.'), ('"-..-..-', '"-..-..'), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('--', '-'), ('-..-', '..-'), ('-.', '.'), ('-....', '....'), ("----'", "---'"), ('--..', '-..')]
LibM19160401-V11-02-page28.txt: [('there-', 'there')]
LibM19160401-V11-02-page29.txt: [('-', '')]
LibM19160401-V11-02-page30.txt: [('Mc-', 'Mc')]
LibM19160401-V11-02-page31.txt: [('.-', '.')]
LibM19160401-V11-02-page32.txt: [('pri-', 'pri')]
LibM19160401-V11-02-page33.txt: [('-', '')]
LibM19160401-V11-02-page36.txt: [('liv-', 'liv')]
LibM19160401-V11-02-page39.txt: [('recog-', 'recog')]
LibM19160401-V11-02-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('mem-', 'mem'), ('there-', 'there')]
LibM19160401-V11-02-page40.txt: [('en-', 'en'), ('-', '')]
LibM19160401-V11-02-page43.txt: [('Pa-', 'Pa')]
LibM19160401-V11-02-page45.txt: [('--', '-')]
LibM19160401-V11-02-page48.txt: [('the-', 'the')]
LibM19160401-V11-02-page49.txt: [('Mc-', 'Mc')]
LibM19160401-V11-02-page5.txt: [('meas-', 'meas')]
LibM19160401-V11-02-page51.txt: [('-', '')]
LibM19160401-V11-02-page6.txt: [('-being', 'being')]
LibM19160401-V11-02-page7.txt: [('-', '')]
LibM19160401-V11-02-page8.txt: [('Congress-', 'Congress')]
LibM19160401-V11-02-page9.txt: [('-', '')]
LibM19160401-V11-02e-page1.txt: [('-', ''), ('-', '')]
LibM19160401-V11-02e-page12.txt: [('Pot-', 'Pot')]
LibM19160401-V11-02e-page14.txt: [('---', '--')]
LibM19160401-V11-02e-page3.txt: [('-', '')]
LibM19160401-V11-02e-page5.txt: [('-be', 'be')]
LibM19160401-V11-02e-page9.txt: [('morals.--', 'morals.-')]
LibM19160701-V11-03-page12.txt: [('-', '')]
LibM19160701-V11-03-page14.txt: [('execu-', 'execu')]
LibM19160701-V11-03-page15.txt: [('legit-', 'legit')]
LibM19160701-V11-03-page16.txt: [('-Rest-in-Seven', 'Rest-in-Seven')]
LibM19160701-V11-03-page18.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('sup-', 'sup')]
LibM19160701-V11-03-page21.txt: [('-', ''), ('-', ''), ('Sec-', 'Sec')]
LibM19160701-V11-03-page22.txt: [('-', '')]
LibM19160701-V11-03-page23.txt: [('-', ''), ('Vat-', 'Vat')]
LibM19160701-V11-03-page26.txt: [('-', ''), ('institu-', 'institu'), ('-', '')]
LibM19160701-V11-03-page28.txt: [('-', ''), ('tem-', 'tem')]
LibM19160701-V11-03-page29.txt: [('deci-', 'deci')]
LibM19160701-V11-03-page3.txt: [('-', '')]
LibM19160701-V11-03-page30.txt: [('remem-', 'remem')]
LibM19160701-V11-03-page31.txt: [('dis-', 'dis'), ('-United', 'United'), ('cur-', 'cur')]
LibM19160701-V11-03-page32.txt: [('-', '')]
LibM19160701-V11-03-page33.txt: [('-', '')]
LibM19160701-V11-03-page34.txt: [('-', '')]
LibM19160701-V11-03-page35.txt: [('-', '')]
LibM19160701-V11-03-page40.txt: [('-from', 'from')]
LibM19160701-V11-03-page41.txt: [('-', '')]
LibM19160701-V11-03-page43.txt: [('-', '')]
LibM19160701-V11-03-page49.txt: [('Postmaster-', 'Postmaster'), ('Postmaster-', 'Postmaster')]
LibM19160701-V11-03-page51.txt: [('-', '')]
LibM19160701-V11-03-page9.txt: [('-', '')]
LibM19161001-V11-04-page1.txt: [('-', '')]
LibM19161001-V11-04-page12.txt: [('superstitions."--', 'superstitions."-')]
LibM19161001-V11-04-page15.txt: [('CHAR-', 'CHAR')]
LibM19161001-V11-04-page16.txt: [('-', '')]
LibM19161001-V11-04-page17.txt: [('Watch-', 'Watch'), ('Postmaster-', 'Postmaster')]
LibM19161001-V11-04-page2.txt: [('inter-', 'inter')]
LibM19161001-V11-04-page20.txt: [('through-', 'through')]
LibM19161001-V11-04-page21.txt: [('-', '')]
LibM19161001-V11-04-page22.txt: [('as-', 'as'), ('-', '')]
LibM19161001-V11-04-page23.txt: [('-', '')]
LibM19161001-V11-04-page24.txt: [('unde-', 'unde'), ('observ-', 'observ')]
LibM19161001-V11-04-page25.txt: [('mil-', 'mil')]
LibM19161001-V11-04-page26.txt: [('-', ''), ('suf-', 'suf')]
LibM19161001-V11-04-page27.txt: [('right-', 'right')]
LibM19161001-V11-04-page33.txt: [('-', '')]
LibM19161001-V11-04-page36.txt: [('stir-', 'stir')]
LibM19161001-V11-04-page39.txt: [('-revived', 'revived')]
LibM19161001-V11-04-page40.txt: [('how-', 'how')]
LibM19161001-V11-04-page41.txt: [('denomi-', 'denomi'), ('-', ''), ('re-', 're')]
LibM19161001-V11-04-page44.txt: [('govern-', 'govern'), ('.-', '.'), ('-', '')]
LibM19161001-V11-04-page45.txt: [('ac-', 'ac')]
LibM19161001-V11-04-page47.txt: [('voy-', 'voy')]
LibM19161001-V11-04-page49.txt: [('Anti-', 'Anti')]
LibM19161001-V11-04-page50.txt: [('-', '')]
LibM19161001-V11-04-page52.txt: [('T-', 'T')]
LibM19161001-V11-04-page6.txt: [('ex-', 'ex')]
LibM19170101-V12-01-page1.txt: [('-', ''), ('-', '')]
LibM19170101-V12-01-page13.txt: [('-', '')]
LibM19170101-V12-01-page14.txt: [('dis-', 'dis')]
LibM19170101-V12-01-page16.txt: [('-', '')]
LibM19170101-V12-01-page19.txt: [('-', ''), ('-', ''), ('DE-', 'DE')]
LibM19170101-V12-01-page2.txt: [('inter-', 'inter'), ('af-', 'af'), ('Ten-', 'Ten'), ('Wat-', 'Wat')]
LibM19170101-V12-01-page23.txt: [('-legislation', 'legislation')]
LibM19170101-V12-01-page27.txt: [('-', '')]
LibM19170101-V12-01-page3.txt: [('Sab-', 'Sab')]
LibM19170101-V12-01-page34.txt: [('religious-', 'religious'), ('-', '')]
LibM19170101-V12-01-page6.txt: [('-as', 'as'), ('re-', 're')]
LibM19170101-V12-01-page7.txt: [('un-', 'un')]
LibM19170101-V12-01-page9.txt: [('-', '')]
LibM19170401-V12-02-page10.txt: [('work-', 'work')]
LibM19170401-V12-02-page11.txt: [('praise-', 'praise')]
LibM19170401-V12-02-page18.txt: [('non-', 'non')]
LibM19170401-V12-02-page21.txt: [('valid-', 'valid')]
LibM19170401-V12-02-page23.txt: [('-', '')]
LibM19170401-V12-02-page27.txt: [('founda-', 'founda')]
LibM19170401-V12-02-page29.txt: [('recog-', 'recog')]
LibM19170401-V12-02-page30.txt: [('Attorney-', 'Attorney')]
LibM19170401-V12-02-page33.txt: [('-observance', 'observance')]
LibM19170401-V12-02-page34.txt: [('-', ''), ('Multi-', 'Multi')]
LibM19170401-V12-02-page35.txt: [('-', ''), ('alfigent-', 'alfigent')]
LibM19170401-V12-02-page5.txt: [('-', ''), ('at-', 'at')]
LibM19170401-V12-02-page7.txt: [('-', '')]
LibM19170401-V12-02-page8.txt: [('in-', 'in')]
LibM19170701-V12-03-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-.....', '.....')]
LibM19170701-V12-03-page12.txt: [('govern-', 'govern')]
LibM19170701-V12-03-page13.txt: [('-', '')]
LibM19170701-V12-03-page14.txt: [('un-', 'un')]
LibM19170701-V12-03-page15.txt: [('.-', '.'), ('-', '')]
LibM19170701-V12-03-page19.txt: [('r.nr--', 'r.nr-')]
LibM19170701-V12-03-page2.txt: [('Mis-', 'Mis')]
LibM19170701-V12-03-page20.txt: [('III.-', 'III.'), ('CXXX.-', 'CXXX.'), ('gover-', 'gover')]
LibM19170701-V12-03-page23.txt: [('---', '--'), ('---', '--')]
LibM19170701-V12-03-page26.txt: [('-', '')]
LibM19170701-V12-03-page28.txt: [('-', '')]
LibM19170701-V12-03-page29.txt: [('-', '')]
LibM19170701-V12-03-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('---', '--'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19170701-V12-03-page31.txt: [('-', '')]
LibM19170701-V12-03-page32.txt: [('-', '')]
LibM19170701-V12-03-page33.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19170701-V12-03-page36.txt: [('-', ''), ('-', ''), ('.-', '.')]
LibM19170701-V12-03-page8.txt: [('-', '')]
LibM19170701-V12-03-page9.txt: [('-', '')]
LibM19171001-V12-04-page10.txt: [('POR-', 'POR'), ('CON-', 'CON')]
LibM19171001-V12-04-page11.txt: [('suav-', 'suav'), ('-their', 'their'), ('unlim-', 'unlim')]
LibM19171001-V12-04-page12.txt: [('-', ''), ('Medo-', 'Medo'), ('-', '')]
LibM19171001-V12-04-page13.txt: [('-', '')]
LibM19171001-V12-04-page16.txt: [('-', '')]
LibM19171001-V12-04-page18.txt: [('P-', 'P'), ('Protestant-', 'Protestant'), ('-o', 'o')]
LibM19171001-V12-04-page19.txt: [('P-', 'P')]
LibM19171001-V12-04-page21.txt: [('effec-', 'effec')]
LibM19171001-V12-04-page23.txt: [('-', ''), ('lines.-', 'lines.')]
LibM19171001-V12-04-page27.txt: [('-', '')]
LibM19171001-V12-04-page28.txt: [('under-', 'under'), ('un-', 'un')]
LibM19171001-V12-04-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19171001-V12-04-page30.txt: [('-r', 'r')]
LibM19171001-V12-04-page34.txt: [('Alleghanies.-', 'Alleghanies.')]
LibM19171001-V12-04-page35.txt: [('Food-', 'Food')]
LibM19171001-V12-04-page7.txt: [('.-', '.')]
LibM19180101-V13-01-page1.txt: [('ress--', 'ress-'), ('er-', 'er')]
LibM19180101-V13-01-page11.txt: [('--', '-')]
LibM19180101-V13-01-page12.txt: [('intro-', 'intro'), ('con-', 'con')]
LibM19180101-V13-01-page17.txt: [('Postmaster-', 'Postmaster'), ('deter-', 'deter')]
LibM19180101-V13-01-page19.txt: [('ar-', 'ar'), ('-', '')]
LibM19180101-V13-01-page24.txt: [('power-', 'power'), ('Ars--', 'Ars-'), ('-', ''), ('enfranchise-', 'enfranchise')]
LibM19180101-V13-01-page28.txt: [('-', '')]
LibM19180101-V13-01-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19180101-V13-01-page31.txt: [('re-', 're')]
LibM19180101-V13-01-page33.txt: [('Anti-', 'Anti')]
LibM19180101-V13-01-page4.txt: [('-', ''), ('-', ''), ('-C', 'C'), ('-s', 's'), ('-', '')]
LibM19180101-V13-01-page6.txt: [('-this', 'this'), ('foun-', 'foun')]
LibM19180101-V13-01-page7.txt: [('can-', 'can')]
LibM19180101-V13-01-page8.txt: [('Rear-', 'Rear')]
LibM19180401-V13-02-page1.txt: [('-', '')]
LibM19180401-V13-02-page12.txt: [('Go-to-', 'Go-to')]
LibM19180401-V13-02-page13.txt: [('.---', '.--'), ('--', '-'), ('-s-', 's-'), ('con-', 'con'), ("-to'i.", "to'i."), ('.-', '.'), ('----', '---'), ('---', '--'), ('--', '-'), ('---', '--'), ('-', ''), ('.-.-', '.-.'), ('-', ''), ('-.', '.'), ('-', ''), ('---', '--'), ('Eng-', 'Eng'), ('-', ''), ('..--', '..-'), ('.f..--', '.f..-'), ('-', ''), ('......--', '......-'), ('---', '--'), ('-.', '.'), ('---', '--'), ('-', ''), ("-----'---..-", "----'---..-"), ('-...', '...'), ('.-.-.-.-', '.-.-.-.'), ('-', ''), ('.-', '.'), ('-.-."', '.-."')]
LibM19180401-V13-02-page16.txt: [('Je-', 'Je')]
LibM19180401-V13-02-page17.txt: [('-', '')]
LibM19180401-V13-02-page19.txt: [('free-', 'free')]
LibM19180401-V13-02-page20.txt: [('-', ''), ('-', '')]
LibM19180401-V13-02-page21.txt: [('-', ''), ('-', '')]
LibM19180401-V13-02-page22.txt: [('-', ''), ('-Palestine', 'Palestine'), ('-', '')]
LibM19180401-V13-02-page23.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19180401-V13-02-page24.txt: [('MASSA-', 'MASSA')]
LibM19180401-V13-02-page26.txt: [('Co-', 'Co')]
LibM19180401-V13-02-page28.txt: [('-', ''), ('-', '')]
LibM19180401-V13-02-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19180401-V13-02-page30.txt: [('-', '')]
LibM19180401-V13-02-page35.txt: [('-', ''), ('-', ''), ('-', ''), ('r-', 'r'), ('---', '--')]
LibM19180401-V13-02-page5.txt: [('thered.-', 'thered.')]
LibM19180701-V13-03-page1.txt: [('-', '')]
LibM19180701-V13-03-page14.txt: [('-', '')]
LibM19180701-V13-03-page16.txt: [('-', ''), ('-', '')]
LibM19180701-V13-03-page17.txt: [('-', '')]
LibM19180701-V13-03-page18.txt: [('af-', 'af')]
LibM19180701-V13-03-page25.txt: [('apes-', 'apes')]
LibM19180701-V13-03-page29.txt: [('Jean-', 'Jean')]
LibM19180701-V13-03-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19180701-V13-03-page30.txt: [('time..--', 'time..-')]
LibM19180701-V13-03-page34.txt: [('cog-', 'cog')]
LibM19180701-V13-03-page6.txt: [('fol-', 'fol')]
LibM19180701-V13-03-page8.txt: [('-', '')]
LibM19181001-V13-04-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19181001-V13-04-page11.txt: [('lit-', 'lit'), ('Sun-', 'Sun'), ('Ordi-', 'Ordi')]
LibM19181001-V13-04-page13.txt: [('democ-', 'democ'), ('it-', 'it')]
LibM19181001-V13-04-page14.txt: [('af-', 'af')]
LibM19181001-V13-04-page16.txt: [('ap-', 'ap')]
LibM19181001-V13-04-page18.txt: [('fore-', 'fore')]
LibM19181001-V13-04-page19.txt: [('auto-', 'auto')]
LibM19181001-V13-04-page2.txt: [('-.', '.'), ('.-', '.'), ('pre-', 'pre'), ('Ida-', 'Ida'), ('af-', 'af')]
LibM19181001-V13-04-page20.txt: [('peril-', 'peril'), ('be-', 'be')]
LibM19181001-V13-04-page23.txt: [('-', '')]
LibM19181001-V13-04-page24.txt: [('-', ''), ('-', '')]
LibM19181001-V13-04-page25.txt: [('-ruled', 'ruled')]
LibM19181001-V13-04-page28.txt: [('-', '')]
LibM19181001-V13-04-page29.txt: [('-', ''), ('Assoeia-', 'Assoeia')]
LibM19181001-V13-04-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19181001-V13-04-page30.txt: [('One-', 'One')]
LibM19181001-V13-04-page34.txt: [('reli-', 'reli')]
LibM19181001-V13-04-page5.txt: [('call-', 'call')]
LibM19181001-V13-04-page6.txt: [('-', ''), ('-', '')]
LibM19181001-V13-04-page7.txt: [('en-', 'en')]
LibM19181001-V13-04-page8.txt: [('Declara-', 'Declara')]
LibM19190101-V15-01-page1.txt: [('-', '')]
LibM19190101-V15-01-page12.txt: [('-A', 'A')]
LibM19190101-V15-01-page15.txt: [('-', ''), ('-', ''), ('-', '')]
LibM19190101-V15-01-page17.txt: [('-', '')]
LibM19190101-V15-01-page18.txt: [('Wil-', 'Wil')]
LibM19190101-V15-01-page2.txt: [('Ida-', 'Ida'), ('T"-', 'T"')]
LibM19190101-V15-01-page21.txt: [('-religions', 'religions')]
LibM19190101-V15-01-page22.txt: [('-', '')]
LibM19190101-V15-01-page23.txt: [('-', '')]
LibM19190101-V15-01-page28.txt: [('-.', '.'), ('i"----', 'i"---'), ('-j', 'j'), ('-', ''), ('-', ''), ('-e-', 'e-'), ('-', ''), ('-of', 'of')]
LibM19190101-V15-01-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19190101-V15-01-page5.txt: [('thereof.-', 'thereof.'), ('COUN-', 'COUN'), ('-', '')]
LibM19190401-V15-02-page1.txt: [('-', '')]
LibM19190401-V15-02-page10.txt: [('..-', '..'), ('pro-', 'pro')]
LibM19190401-V15-02-page12.txt: [('i-', 'i'), ('....-', '....')]
LibM19190401-V15-02-page13.txt: [('neigh-', 'neigh')]
LibM19190401-V15-02-page15.txt: [('to-', 'to'), ('-ether.', 'ether.')]
LibM19190401-V15-02-page17.txt: [('-', ''), ('RE-', 'RE'), ('pro-', 'pro')]
LibM19190401-V15-02-page18.txt: [('Medo-', 'Medo'), ('constrain-', 'constrain')]
LibM19190401-V15-02-page19.txt: [('repub-', 'repub'), ('inter-', 'inter'), ('power-', 'power'), ('Dan-', 'Dan'), ('-', '')]
LibM19190401-V15-02-page2.txt: [('-cl', 'cl')]
LibM19190401-V15-02-page21.txt: [('jit-', 'jit')]
LibM19190401-V15-02-page22.txt: [('-', '')]
LibM19190401-V15-02-page23.txt: [('-', '')]
LibM19190401-V15-02-page28.txt: [('-', '')]
LibM19190401-V15-02-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19190401-V15-02-page7.txt: [('major-', 'major')]
LibM19190401-V15-02-page9.txt: [('-.', '.')]
LibM19190701-V15-03-page10.txt: [('-', '')]
LibM19190701-V15-03-page11.txt: [('Re-', 'Re')]
LibM19190701-V15-03-page12.txt: [('-', ''), ('pun-', 'pun')]
LibM19190701-V15-03-page13.txt: [('con-', 'con')]
LibM19190701-V15-03-page18.txt: [('com-', 'com')]
LibM19190701-V15-03-page2.txt: [('inter-', 'inter'), ('Of-', 'Of'), ('affil-', 'affil'), ('Co-', 'Co')]
LibM19190701-V15-03-page21.txt: [('-E', 'E'), ('Ite-', 'Ite'), ('-', ''), ('-', ''), ('-', ''), ('pa-', 'pa'), ('-', '')]
LibM19190701-V15-03-page22.txt: [('-', '')]
LibM19190701-V15-03-page25.txt: [('---', '--'), ('-r--', 'r--'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19190701-V15-03-page28.txt: [('President.\'"--', 'President.\'"-'), ('---', '--')]
LibM19190701-V15-03-page30.txt: [('believ-', 'believ')]
LibM19190701-V15-03-page31.txt: [('op-', 'op')]
LibM19190701-V15-03-page34.txt: [('-', '')]
LibM19190701-V15-03-page36.txt: [('f-', 'f'), ('-ewikik', 'ewikik')]
LibM19190701-V15-03-page5.txt: [('Kt-', 'Kt'), ('-.', '.'), ('--', '-'), ('--', '-'), ('-', '')]
LibM19190701-V15-03-page6.txt: [('--', '-')]
LibM19190701-V15-03-page7.txt: [('Jef-', 'Jef')]
LibM19190701-V15-03-page9.txt: [('con-', 'con')]
LibM19191001-V15-04-page11.txt: [('Con-', 'Con')]
LibM19191001-V15-04-page15.txt: [('-', ''), ('-B.', 'B.')]
LibM19191001-V15-04-page17.txt: [('-', ''), ('-', ''), ('non-', 'non'), ('en-', 'en')]
LibM19191001-V15-04-page21.txt: [('sur-', 'sur')]
LibM19191001-V15-04-page27.txt: [('-', ''), ('-', '')]
LibM19191001-V15-04-page28.txt: [('f-', 'f')]
LibM19191001-V15-04-page5.txt: [('-', '')]
LibM19191001-V15-04-page7.txt: [('pub-', 'pub')]
LibM19191001-V15-04-page8.txt: [('Massa-', 'Massa'), ('re-', 're')]
LibM19200101-V14-01-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
LibM19200101-V14-01-page12.txt: [('ESTAB-', 'ESTAB')]
LibM19200101-V14-01-page15.txt: [('-', '')]
LibM19200101-V14-01-page16.txt: [('-', ''), ('-', '')]
LibM19200101-V14-01-page17.txt: [('reli-', 'reli'), ('MUTCH-', 'MUTCH')]
LibM19200101-V14-01-page18.txt: [('cir-', 'cir')]
LibM19200101-V14-01-page20.txt: [('pa-', 'pa')]
LibM19200101-V14-01-page22.txt: [('-', ''), ('-', ''), ('-', ''), ('Na-', 'Na'), ('---', '--')]
LibM19200101-V14-01-page32.txt: [('-', '')]
LibM19200101-V14-01-page5.txt: [('fol-', 'fol')]
LibM19200101-V14-01-page6.txt: [('-', ''), ('espe-', 'espe'), ('At-', 'At')]
LibM19200101-V14-01-page7.txt: [('re-', 're'), ('-', '')]
LibM19200101-V14-01-page8.txt: [('-', '')]
LibM19200401-V14-02-page10.txt: [('ban-', 'ban')]
LibM19200401-V14-02-page11.txt: [('denorai-', 'denorai'), ('-', '')]
LibM19200401-V14-02-page13.txt: [('-', ''), ('-', ''), ('na-', 'na')]
LibM19200401-V14-02-page14.txt: [('Sun-', 'Sun'), ('-', '')]
LibM19200401-V14-02-page19.txt: [('Com-', 'Com'), ('-', '')]
LibM19200401-V14-02-page21.txt: [('com-', 'com')]
LibM19200401-V14-02-page22.txt: [('-', '')]
LibM19200401-V14-02-page29.txt: [('-at', 'at')]
LibM19200401-V14-02-page31.txt: [('---', '--'), ('a-', 'a')]
LibM19200401-V14-02-page35.txt: [('--', '-'), ('-', ''), ('-', ''), ('--', '-'), ('kt-', 'kt')]
LibM19200401-V14-02-page5.txt: [('thereof.-', 'thereof.'), ('-', ''), ('un-', 'un')]
LibM19200401-V14-02-page6.txt: [('amuse-', 'amuse'), ('re-', 're'), ('--', '-')]
LibM19200401-V14-02-page8.txt: [('Wheel-', 'Wheel'), ('advo-', 'advo')]
LibM19200401-V14-02-page9.txt: [('com-', 'com')]
LibM19200701-V14-03-page10.txt: [('an-', 'an')]
LibM19200701-V14-03-page12.txt: [('-', ''), ('-', ''), ('-', ''), ('-.', '.')]
LibM19200701-V14-03-page14.txt: [('unmistaka-', 'unmistaka'), ('ar-', 'ar'), ('-c', 'c')]
LibM19200701-V14-03-page15.txt: [('-and', 'and'), ('-..', '..'), ('Sunday-', 'Sunday'), ('-iii', 'iii')]
LibM19200701-V14-03-page16.txt: [('-', ''), ('Lib-', 'Lib')]
LibM19200701-V14-03-page17.txt: [('-', ''), ('-', ''), ('-', ''), ('iVi-', 'iVi')]
LibM19200701-V14-03-page2.txt: [('affil-', 'affil')]
LibM19200701-V14-03-page20.txt: [('-', ''), ('-', ''), ('rafarowi-erivirorre-', 'rafarowi-erivirorre'), ('-mititayerwiriiiinicrierier-rimorwai-weiverreitaararforreahaarivitoroyerriiivii', 'mititayerwiriiiinicrierier-rimorwai-weiverreitaararforreahaarivitoroyerriiivii')]
LibM19200701-V14-03-page24.txt: [('-', ''), ('--------', '-------')]
LibM19200701-V14-03-page26.txt: [('un-', 'un')]
LibM19200701-V14-03-page31.txt: [('-weesie', 'weesie'), ('Llimtstoo-', 'Llimtstoo'), ('plain-', 'plain')]
LibM19200701-V14-03-page32.txt: [('-', '')]
LibM19200701-V14-03-page33.txt: [('-', ''), ('Con-', 'Con')]
LibM19200701-V14-03-page36.txt: [('-', ''), ('-', '')]
LibM19200701-V14-03-page5.txt: [('-i-Ifidairicliiiriiirroi', 'i-Ifidairicliiiriiirroi')]
LibM19200701-V14-03-page7.txt: [('-', '')]
LibM19200701-V14-03-page8.txt: [('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-vol', 'vol'), ('-i-', 'i-'), ('wofriw-', 'wofriw'), ('-iv-.-ii.-', 'iv-.-ii.-')]
LibM19201001-V14-04-page14.txt: [('Amer-', 'Amer')]
LibM19201001-V14-04-page15.txt: [('un-', 'un')]
LibM19201001-V14-04-page16.txt: [('-', ''), ('por-', 'por')]
LibM19201001-V14-04-page18.txt: [('un-', 'un')]
LibM19201001-V14-04-page19.txt: [('un-', 'un'), ('Fugitive-', 'Fugitive')]
LibM19201001-V14-04-page21.txt: [('in-', 'in')]
LibM19201001-V14-04-page22.txt: [('Vice-', 'Vice'), ('-of', 'of')]
LibM19201001-V14-04-page23.txt: [('-', '')]
LibM19201001-V14-04-page26.txt: [('-', '')]
LibM19201001-V14-04-page27.txt: [('neg-', 'neg')]
LibM19201001-V14-04-page30.txt: [('-', '')]
LibM19201001-V14-04-page34.txt: [('en-', 'en')]
LibM19201001-V14-04-page4.txt: [('-MASS.', 'MASS.')]
LibM19201001-V14-04-page5.txt: [('-', '')]
LibM19201001-V14-04-page6.txt: [('-', '')]
LibM19201001-V14-04-page7.txt: [('Fed-', 'Fed'), ('n-', 'n'), ('---', '--'), ('"--r-f-', '"--r-f'), ('-', ''), ('eXti-', 'eXti')]
LibM19201001-V14-04-page9.txt: [('-', '')]
In [20]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction3 Average verified rate: 0.9808327456835285 Average of error rates: 0.03449303008070433 Total token count: 1452112
In [21]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[21]:
[("'", 1601),
('m', 1336),
('d', 1255),
('e', 1005),
('w', 956),
('t', 838),
('n', 784),
('r', 684),
('f', 634),
('g', 385),
('x', 271),
('u', 208),
('k', 192),
('tv', 150),
('th', 121),
('pa', 104),
('sunday-law', 92),
('re', 89),
('z', 82),
('ex', 77),
('co', 74),
('io', 72),
('id', 71),
('postmaster-general', 62),
('mo', 62),
('ga', 58),
('post-offices', 57),
('un', 57),
('un-american', 57),
('va', 56),
('statute-books', 56),
('sunday-closing', 54),
('church-and-state', 49),
('tion', 45),
('mm', 45),
('q', 44),
('li', 43),
('mt', 42),
('attorney-general', 41),
('sunday-rest', 39),
('wm', 38),
('pp', 38),
('mi', 37),
('charta', 37),
('ro', 37),
('mc', 33),
('ri', 31),
('neander', 31),
('al', 31),
('-', 30)]
Correction 4 -- Remove extra quotation marks¶
In [22]:
# %load shared_elements/replace_extra_quotation_marks.py
prev = cycle
cycle = "correction4"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
corrections = []
for token in tokens:
token_list = list(token)
last_char = token_list[-1]
if last_char is "'":
if len(token) > 1:
if token_list[-2] is 's' or 'S':
pass
else:
corrections.append((token, re.sub(r"'", r"", token)))
else:
pass
elif token[0] is "'":
corrections.append((token, re.sub(r"'", r"", token)))
else:
pass
if len(corrections) > 0:
print('{}: {}'.format(filename, corrections))
for correction in corrections:
content = clean.replace_pair(correction, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
LibM19060401-V01-01-page20.txt: [("'bound", 'bound')]
LibM19060401-V01-01-page22.txt: [("'co", 'co')]
LibM19060401-V01-01-page25.txt: [("'brought", 'brought')]
LibM19060401-V01-01-page29.txt: [("'Sunday", 'Sunday'), ("'hundred", 'hundred')]
LibM19060401-V01-01-page31.txt: [("'Concerning", 'Concerning'), ("'Connecticut", 'Connecticut'), ("'hearkened", 'hearkened'), ("'brother", 'brother'), ("'bow", 'bow')]
LibM19060401-V01-01-page33.txt: [("'CORTELYOU", 'CORTELYOU')]
LibM19060401-V01-01-page34.txt: [("'advocate", 'advocate'), ("'and", 'and'), ("'contrast", 'contrast')]
LibM19060401-V01-01-page35.txt: [("'e", 'e')]
LibM19060401-V01-01-page7.txt: [("'belief", 'belief')]
LibM19060401-V01-01-page8.txt: [("'Caesar", 'Caesar')]
LibM19060401-V01-01-page9.txt: [("'by", 'by')]
LibM19060701-V01-02-page21.txt: [("'twixt", 'twixt')]
LibM19061001-V01-03-page18.txt: [("'corresponding", 'corresponding')]
LibM19061001-V01-03-page22.txt: [('\'"', '"')]
LibM19061001-V01-03-page23.txt: [("'fragile", 'fragile'), ("'Where", 'Where')]
LibM19061001-V01-03-page24.txt: [("'ir", 'ir')]
LibM19061001-V01-03-page25.txt: [("'Brewer", 'Brewer'), ("'factories", 'factories'), ("'prohibited", 'prohibited')]
LibM19061001-V01-03-page29.txt: [("'of", 'of')]
LibM19061001-V01-03-page32.txt: [("'Judge", 'Judge'), ("''Sou.", 'Sou.')]
LibM19061001-V01-03-page8.txt: [("'to", 'to')]
LibM19070101-V02-01-page12.txt: [("'.", '.')]
LibM19070101-V02-01-page17.txt: [("'it", 'it')]
LibM19070101-V02-01-page20.txt: [("'work", 'work')]
LibM19070101-V02-01-page24.txt: [("'monstrous", 'monstrous')]
LibM19070101-V02-01-page32.txt: [("'Zealous", 'Zealous')]
LibM19070101-V02-01-page34.txt: [("'burning", 'burning')]
LibM19070401-V02-02-page12.txt: [("'on", 'on')]
LibM19070401-V02-02-page2.txt: [("'twas", 'twas')]
LibM19070401-V02-02-page25.txt: [("'they", 'they'), ("'the", 'the')]
LibM19070401-V02-02-page27.txt: [("'il", 'il')]
LibM19070401-V02-02-page28.txt: [("'said", 'said'), ("'They", 'They')]
LibM19070401-V02-02-page29.txt: [("'voluntarily", 'voluntarily')]
LibM19070401-V02-02-page3.txt: [("'Bless", 'Bless')]
LibM19070401-V02-02-page6.txt: [("'disobey", 'disobey')]
LibM19070701-V02-03-page1.txt: [("'a", 'a')]
LibM19070701-V02-03-page13.txt: [("'Demand", 'Demand')]
LibM19070701-V02-03-page25.txt: [("'to", 'to')]
LibM19070701-V02-03-page27.txt: [("'earnest", 'earnest')]
LibM19070701-V02-03-page31.txt: [("'uses", 'uses')]
LibM19070701-V02-03-page33.txt: [("'one", 'one')]
LibM19070701-V02-03-page6.txt: [("'time", 'time')]
LibM19071001-V02-04-page11.txt: [("'Bishop", 'Bishop'), ("'press", 'press')]
LibM19071001-V02-04-page31.txt: [("'I", 'I')]
LibM19071001-V02-04-page50.txt: [("'legislation", 'legislation')]
LibM19071001-V02-04-page51.txt: [("'Ier", 'Ier'), ("'pr..", 'pr..'), ("'Isom", 'Isom'), ("'rotor", 'rotor'), ("'.", '.'), ("'en", 'en'), ("'...", '...'), ("'rryn", 'rryn'), ("'hot", 'hot')]
LibM19071001-V02-04-page7.txt: [("'because", 'because')]
LibM19071001-V02-04-page8.txt: [("'In", 'In')]
LibM19080101-V03-01-page22.txt: [("'religionaboveall", 'religionaboveall')]
LibM19080101-V03-01-page23.txt: [("'of", 'of'), ("'tween", 'tween')]
LibM19080101-V03-01-page42.txt: [("'the", 'the')]
LibM19080401-V03-02-page1.txt: [('\'".', '".')]
LibM19080401-V03-02-page18.txt: [("'A", 'A')]
LibM19080401-V03-02-page31.txt: [("'or", 'or'), ("'from", 'from')]
LibM19080401-V03-02-page33.txt: [("'Tis", 'Tis')]
LibM19080701-V03-03-page11.txt: [("'ago", 'ago')]
LibM19080701-V03-03-page43.txt: [("'Sunday", 'Sunday')]
LibM19080701-V03-03-page52.txt: [("'um", 'um')]
LibM19081001-V03-04-page33.txt: [("'we", 'we')]
LibM19090101-V04-01-page15.txt: [("'honor", 'honor')]
LibM19090101-V04-01-page44.txt: [("'s", 's')]
LibM19090101-V04-01-page49.txt: [("'A", 'A')]
LibM19090401-V04-02-page1.txt: [("''.", '.')]
LibM19090401-V04-02-page2.txt: [("'.", '.'), ("'r", 'r')]
LibM19090401-V04-02-page20.txt: [("'religious", 'religious')]
LibM19090401-V04-02-page24.txt: [("'the", 'the')]
LibM19090401-V04-02-page32.txt: [("'a", 'a')]
LibM19090401-V04-02-page45.txt: [("'fio", 'fio'), ("'.", '.')]
LibM19090401-V04-02-page48.txt: [("'UNTIL", 'UNTIL')]
LibM19090401-V04-02-page49.txt: [("'A", 'A')]
LibM19090401-V04-02-page51.txt: [("'ARIZ", 'ARIZ')]
LibM19090701-V04-03-page1.txt: [("'''....", '....')]
LibM19090701-V04-03-page38.txt: [("'moment", 'moment')]
LibM19090701-V04-03-page43.txt: [("'for", 'for')]
LibM19090701-V04-03-page44.txt: [("'WET'andtRIY", 'WETandtRIY')]
LibM19090701-V04-03-page45.txt: [("'GIP", 'GIP')]
LibM19090701-V04-03-page49.txt: [("'iples", 'iples')]
LibM19091001-V04-04-page14.txt: [("'background.", 'background.')]
LibM19091001-V04-04-page39.txt: [("'instance", 'instance')]
LibM19091001-V04-04-page40.txt: [("'our", 'our')]
LibM19091001-V04-04-page46.txt: [("'much", 'much')]
LibM19091001-V04-04-page5.txt: [("'of", 'of')]
LibM19100101-V05-01-page10.txt: [("'its", 'its')]
LibM19100101-V05-01-page11.txt: [("'of", 'of'), ("'been", 'been')]
LibM19100101-V05-01-page12.txt: [("'now", 'now')]
LibM19100101-V05-01-page17.txt: [("'of", 'of')]
LibM19100101-V05-01-page19.txt: [("'siderable", 'siderable')]
LibM19100101-V05-01-page20.txt: [("'day", 'day')]
LibM19100101-V05-01-page25.txt: [("'why", 'why')]
LibM19100101-V05-01-page31.txt: [("'AMOR", 'AMOR')]
LibM19100101-V05-01-page34.txt: [("'Such", 'Such')]
LibM19100101-V05-01-page39.txt: [("'profound", 'profound')]
LibM19100401-V05-02-page14.txt: [("'for", 'for')]
LibM19100401-V05-02-page24.txt: [("'s", 's')]
LibM19100401-V05-02-page27.txt: [("'resident", 'resident')]
LibM19100401-V05-02-page28.txt: [("'from", 'from')]
LibM19100401-V05-02-page32.txt: [("'for", 'for')]
LibM19100401-V05-02-page7.txt: [("'doubt", 'doubt')]
LibM19100701-V05-03-page1.txt: [("'PIN", 'PIN')]
LibM19100701-V05-03-page44.txt: [("'together", 'together')]
LibM19100701-V05-03-page9.txt: [("'demanding", 'demanding')]
LibM19101001-V05-04-page10.txt: [("'direct", 'direct')]
LibM19101001-V05-04-page34.txt: [("'of", 'of'), ("'replies", 'replies')]
LibM19101001-V05-04-page36.txt: [("'Amore", 'Amore')]
LibM19101001-V05-04-page49.txt: [("'MAGAZINE", 'MAGAZINE')]
LibM19110101-V06-01-page10.txt: [("'wants", 'wants')]
LibM19110101-V06-01-page15.txt: [("'demanding", 'demanding')]
LibM19110101-V06-01-page17.txt: [("'shall", 'shall')]
LibM19110101-V06-01-page34.txt: [("'debates", 'debates')]
LibM19110101-V06-01-page35.txt: [("'with", 'with')]
LibM19110101-V06-01-page37.txt: [("'uniting", 'uniting')]
LibM19110101-V06-01-page39.txt: [("'as", 'as')]
LibM19110101-V06-01-page42.txt: [("'Adventists", 'Adventists')]
LibM19110101-V06-01-page45.txt: [("'neath", 'neath')]
LibM19110101-V06-01-page49.txt: [("'St", 'St')]
LibM19110101-V06-01-page5.txt: [("'enforce", 'enforce')]
LibM19110101-V06-01-page9.txt: [("'by", 'by')]
LibM19110401-V06-02-page1.txt: [("'apple", 'apple'), ("'ftIfl", 'ftIfl')]
LibM19110401-V06-02-page26.txt: [("'science", 'science'), ("'of", 'of')]
LibM19110701-V06-03-page2.txt: [("'liberty", 'liberty')]
LibM19110701-V06-03-page21.txt: [("'.", '.')]
LibM19110701-V06-03-page25.txt: [("'eagle", 'eagle')]
LibM19110701-V06-03-page29.txt: [("'of", 'of')]
LibM19110701-V06-03-page30.txt: [("'goo", 'goo')]
LibM19110701-V06-03-page4.txt: [("'painaas", 'painaas')]
LibM19110701-V06-03-page42.txt: [("'and", 'and')]
LibM19110701-V06-03-page46.txt: [("'the", 'the')]
LibM19111001-V06-04-page17.txt: [('\'"', '"')]
LibM19111001-V06-04-page19.txt: [("'positively", 'positively'), ("'V", 'V')]
LibM19111001-V06-04-page20.txt: [("'the", 'the')]
LibM19111001-V06-04-page30.txt: [("'liberties", 'liberties')]
LibM19111001-V06-04-page41.txt: [("'to", 'to')]
LibM19111001-V06-04-page52.txt: [("'Writings", 'Writings')]
LibM19120101-V07-01-page10.txt: [("'with", 'with'), ("'hardly", 'hardly')]
LibM19120101-V07-01-page22.txt: [("'and", 'and')]
LibM19120101-V07-01-page26.txt: [("'child", 'child')]
LibM19120101-V07-01-page27.txt: [("'RESIDENT", 'RESIDENT')]
LibM19120101-V07-01-page29.txt: [("'if", 'if')]
LibM19120101-V07-01-page30.txt: [("'s", 's')]
LibM19120101-V07-01-page31.txt: [("'be", 'be')]
LibM19120401-V07-02-page28.txt: [("'rections", 'rections'), ("'effect", 'effect')]
LibM19120401-V07-02-page32.txt: [("'Catholics", 'Catholics')]
LibM19120401-V07-02-page35.txt: [("'of", 'of'), ("'be", 'be')]
LibM19120701-V07-03-page4.txt: [("'.", '.'), ("'ma", 'ma'), ("'.Z....", '.Z....'), ("'..", '..'), ("'gut.", 'gut.'), ("'.", '.')]
LibM19120701-V07-03-page40.txt: [("'with", 'with')]
LibM19120701-V07-03-page42.txt: [("'of", 'of')]
LibM19120701-V07-03-page51.txt: [("'GAZINE", 'GAZINE')]
LibM19120701-V07-03-page52.txt: [("'Ne", 'Ne')]
LibM19120701-V07-03-page8.txt: [("'twixt", 'twixt')]
LibM19121001-V07-04-page26.txt: [("'tat", 'tat')]
LibM19121001-V07-04-page6.txt: [("'.", '.'), ("'aroe", 'aroe'), ("'ammo", 'ammo'), ("'Meow", 'Meow')]
LibM19130101-V08-01-page15.txt: [("'I-JAMES", 'I-JAMES')]
LibM19130101-V08-01-page2.txt: [("'Religious", 'Religious')]
LibM19130101-V08-01-page22.txt: [("'the", 'the')]
LibM19130101-V08-01-page23.txt: [("'religious", 'religious')]
LibM19130101-V08-01-page31.txt: [("'avoid", 'avoid')]
LibM19130101-V08-01-page42.txt: [("'en.", 'en.'), ("'ode", 'ode')]
LibM19130101-V08-01-page5.txt: [("'White", 'White')]
LibM19130401-V08-02-page13.txt: [("'provided", 'provided')]
LibM19130401-V08-02-page31.txt: [("'of", 'of')]
LibM19130401-V08-02-page32.txt: [("'to", 'to')]
LibM19130401-V08-02-page34.txt: [("'let", 'let')]
LibM19130401-V08-02-page38.txt: [("'Tis", 'Tis')]
LibM19130401-V08-02-page49.txt: [("'Society", 'Society')]
LibM19130701-V08-03-page27.txt: [("'C.'''.", 'C..'), ("'.....", '.....')]
LibM19130701-V08-03-page50.txt: [("'wishing", 'wishing')]
LibM19130701-V08-03-page51.txt: [("'WASH", 'WASH')]
LibM19131001-V08-04-page12.txt: [("'first", 'first')]
LibM19131001-V08-04-page13.txt: [("'of", 'of')]
LibM19131001-V08-04-page25.txt: [("'ay", 'ay')]
LibM19131001-V08-04-page3.txt: [("'This", 'This'), ("'.", '.')]
LibM19131001-V08-04-page4.txt: [("'OVID.", 'OVID.')]
LibM19131001-V08-04-page41.txt: [("'so", 'so')]
LibM19131001-V08-04-page5.txt: [("'on", 'on')]
LibM19131001-V08-04-page52.txt: [("'.", '.')]
LibM19140101-V09-01-page14.txt: [("'give", 'give')]
LibM19140101-V09-01-page15.txt: [("'just", 'just')]
LibM19140101-V09-01-page23.txt: [("'and", 'and')]
LibM19140101-V09-01-page31.txt: [("'i'i", 'ii'), ("'I.", 'I.'), ("'..i", '..i')]
LibM19140101-V09-01-page42.txt: [("'God", 'God')]
LibM19140101-V09-01-page43.txt: [("'once", 'once')]
LibM19140101-V09-01-page56.txt: [("'VA", 'VA'), ("'Nit", 'Nit')]
LibM19140401-V09-02-page15.txt: [("'contrary", 'contrary')]
LibM19140401-V09-02-page20.txt: [("'state", 'state')]
LibM19140401-V09-02-page23.txt: [("'shave", 'shave')]
LibM19140401-V09-02-page4.txt: [("''t", 't')]
LibM19140701-V09-03-page11.txt: [("'tis", 'tis')]
LibM19140701-V09-03-page18.txt: [("'prohibit", 'prohibit')]
LibM19140701-V09-03-page19.txt: [("'The", 'The')]
LibM19140701-V09-03-page26.txt: [("'orris", 'orris')]
LibM19140701-V09-03-page28.txt: [("'the", 'the')]
LibM19140701-V09-03-page29.txt: [("'as", 'as')]
LibM19140701-V09-03-page36.txt: [("'riot", 'riot')]
LibM19140701-V09-03-page37.txt: [("'The", 'The')]
LibM19140701-V09-03-page4.txt: [("'UT", 'UT'), ("'esired.", 'esired.')]
LibM19140701-V09-03-page40.txt: [("'elected", 'elected')]
LibM19140701-V09-03-page45.txt: [("'in", 'in')]
LibM19140701-V09-03-page49.txt: [("'t", 't'), ("'t", 't'), ('\'\'"', '"'), ("'TX", 'TX')]
LibM19140701-V09-03-page51.txt: [("'mon", 'mon')]
LibM19141001-V09-04-page11.txt: [("'a", 'a')]
LibM19141001-V09-04-page13.txt: [("'now", 'now')]
LibM19141001-V09-04-page24.txt: [("'IiE", 'IiE')]
LibM19141001-V09-04-page32.txt: [("'hung", 'hung')]
LibM19141001-V09-04-page35.txt: [("'IV", 'IV')]
LibM19141001-V09-04-page40.txt: [("'enjoy", 'enjoy')]
LibM19141001-V09-04-page45.txt: [("'the", 'the')]
LibM19141001-V09-04-page46.txt: [("'act", 'act')]
LibM19141001-V09-04-page50.txt: [("'M", 'M'), ("'N", 'N'), ("'C", 'C')]
LibM19141001-V09-04-page51.txt: [("'.", '.')]
LibM19141001-V09-04-page52.txt: [("'AK.", 'AK.'), ("'CY", 'CY')]
LibM19141001-V09-04-page9.txt: [("'at", 'at')]
LibM19150101-V10-01-page11.txt: [("'Liberty", 'Liberty')]
LibM19150101-V10-01-page14.txt: [("'a", 'a')]
LibM19150101-V10-01-page21.txt: [("'thus", 'thus')]
LibM19150101-V10-01-page24.txt: [("'the", 'the'), ("'to", 'to')]
LibM19150101-V10-01-page34.txt: [("'Tis", 'Tis')]
LibM19150101-V10-01-page38.txt: [("'fallacy", 'fallacy')]
LibM19150101-V10-01-page48.txt: [("'thereby", 'thereby')]
LibM19150101-V10-01-page52.txt: [("'M", 'M')]
LibM19150101-V10-01-page53.txt: [("'comet", 'comet'), ("'Protestant", 'Protestant')]
LibM19150401-V10-02-page19.txt: [("'directed", 'directed')]
LibM19150401-V10-02-page22.txt: [("'recourse", 'recourse')]
LibM19150401-V10-02-page36.txt: [("'s", 's')]
LibM19150401-V10-02-page39.txt: [("'Upon", 'Upon')]
LibM19150401-V10-02-page46.txt: [("'O", 'O')]
LibM19150401-V10-02-page6.txt: [("'IMN", 'IMN'), ('\'"Ar', '"Ar')]
LibM19150701-V10-03-page12.txt: [("'at", 'at')]
LibM19150701-V10-03-page15.txt: [("'twixti", 'twixti')]
LibM19150701-V10-03-page20.txt: [("'citizens", 'citizens')]
LibM19150701-V10-03-page26.txt: [("'Ipon", 'Ipon')]
LibM19150701-V10-03-page33.txt: [("'defend", 'defend')]
LibM19150701-V10-03-page42.txt: [("'a", 'a')]
LibM19150701-V10-03-page43.txt: [("'in", 'in')]
LibM19151001-V10-04-page11.txt: [("'publish", 'publish')]
LibM19151001-V10-04-page20.txt: [("'Part", 'Part')]
LibM19151001-V10-04-page21.txt: [("'personal", 'personal')]
LibM19151001-V10-04-page22.txt: [("'duty", 'duty')]
LibM19151001-V10-04-page25.txt: [("'buries", 'buries')]
LibM19151001-V10-04-page28.txt: [("'Twixt", 'Twixt')]
LibM19151001-V10-04-page47.txt: [("'immutable", 'immutable')]
LibM19151001-V10-04-page51.txt: [("'Vs", 'Vs')]
LibM19160101-V11-01-page11.txt: [("'union", 'union')]
LibM19160101-V11-01-page13.txt: [("'venerable", 'venerable')]
LibM19160101-V11-01-page44.txt: [("'....", '....'), ('\'\'.....".', '.....".'), ("'JAC'V", 'JACV'), ("'i", 'i'), ("'.", '.'), ("''.", '.'), ("'ti", 'ti'), ("'t", 't'), ("'sr", 'sr'), ("'Ae.", 'Ae.')]
LibM19160101-V11-01-page48.txt: [("'members", 'members')]
LibM19160101-V11-01e-page16.txt: [("'Washington", 'Washington')]
LibM19160101-V11-01e-page7.txt: [("'The", 'The')]
LibM19160401-V11-02-page10.txt: [("'a", 'a'), ("'Na", 'Na')]
LibM19160401-V11-02-page12.txt: [("'as", 'as')]
LibM19160401-V11-02-page16.txt: [("'if", 'if')]
LibM19160401-V11-02-page20.txt: [("'Company", 'Company')]
LibM19160401-V11-02-page26.txt: [("'It", 'It')]
LibM19160401-V11-02-page31.txt: [("'tis", 'tis'), ("'I", 'I')]
LibM19160401-V11-02-page46.txt: [("'Traitors", 'Traitors'), ('\'"', '"')]
LibM19160701-V11-03-page23.txt: [("'IM", 'IM')]
LibM19160701-V11-03-page27.txt: [("'An", 'An')]
LibM19160701-V11-03-page42.txt: [('\'"', '"')]
LibM19160701-V11-03-page6.txt: [("'neath", 'neath')]
LibM19161001-V11-04-page10.txt: [("'a", 'a')]
LibM19161001-V11-04-page19.txt: [("'rest", 'rest')]
LibM19161001-V11-04-page20.txt: [("'Illinois", 'Illinois')]
LibM19161001-V11-04-page35.txt: [("'regarding", 'regarding')]
LibM19161001-V11-04-page36.txt: [("'sent", 'sent')]
LibM19161001-V11-04-page37.txt: [("'the", 'the')]
LibM19161001-V11-04-page39.txt: [("'of", 'of')]
LibM19161001-V11-04-page41.txt: [("'court", 'court')]
LibM19170101-V12-01-page26.txt: [("'nternational", 'nternational')]
LibM19170101-V12-01-page27.txt: [("'Duprey's", 'Dupreys'), ("'Moore", 'Moore')]
LibM19170101-V12-01-page30.txt: [("'banishing", 'banishing')]
LibM19170101-V12-01-page35.txt: [("'ts", 'ts')]
LibM19170101-V12-01-page6.txt: [("'servile", 'servile')]
LibM19170401-V12-02-page16.txt: [("'no", 'no')]
LibM19170401-V12-02-page19.txt: [("'If", 'If')]
LibM19170401-V12-02-page20.txt: [("'Twas", 'Twas')]
LibM19170401-V12-02-page22.txt: [("'Tis", 'Tis')]
LibM19170401-V12-02-page25.txt: [("'Tis", 'Tis')]
LibM19170401-V12-02-page29.txt: [('\'"', '"')]
LibM19170401-V12-02-page5.txt: [("'o", 'o')]
LibM19170401-V12-02-page9.txt: [("'that", 'that')]
LibM19170701-V12-03-page1.txt: [("'al", 'al')]
LibM19170701-V12-03-page10.txt: [("'THE", 'THE')]
LibM19170701-V12-03-page12.txt: [('\'s"', 's"')]
LibM19170701-V12-03-page17.txt: [("'State", 'State')]
LibM19170701-V12-03-page29.txt: [("'the", 'the')]
LibM19171001-V12-04-page1.txt: [("'ublished", 'ublished')]
LibM19171001-V12-04-page16.txt: [("'mounted", 'mounted')]
LibM19171001-V12-04-page18.txt: [("'Luther", 'Luther'), ("'tboot", 'tboot')]
LibM19171001-V12-04-page27.txt: [("'us", 'us')]
LibM19171001-V12-04-page9.txt: [("'coordination", 'coordination'), ("'most", 'most')]
LibM19180101-V13-01-page4.txt: [('\'"E', '"E'), ("'attr", 'attr')]
LibM19180401-V13-02-page14.txt: [("'however", 'however')]
LibM19180401-V13-02-page22.txt: [("'of", 'of')]
LibM19180401-V13-02-page31.txt: [("'fields", 'fields')]
LibM19180401-V13-02-page36.txt: [("'THE", 'THE')]
LibM19180701-V13-03-page10.txt: [("'of", 'of')]
LibM19180701-V13-03-page21.txt: [("'no", 'no')]
LibM19180701-V13-03-page32.txt: [("'years", 'years')]
LibM19180701-V13-03-page34.txt: [("'what", 'what'), ("'Tis", 'Tis')]
LibM19181001-V13-04-page15.txt: [("'being", 'being')]
LibM19181001-V13-04-page19.txt: [("'EMOCRACY", 'EMOCRACY')]
LibM19181001-V13-04-page21.txt: [("'the", 'the')]
LibM19181001-V13-04-page25.txt: [("'virtually", 'virtually')]
LibM19190101-V15-01-page18.txt: [("'enforce", 'enforce')]
LibM19190101-V15-01-page19.txt: [("'remain", 'remain')]
LibM19190101-V15-01-page20.txt: [("'Oxtails", 'Oxtails')]
LibM19190101-V15-01-page22.txt: [("'Sunday", 'Sunday')]
LibM19190401-V15-02-page1.txt: [("'W", 'W')]
LibM19190401-V15-02-page14.txt: [("'the", 'the')]
LibM19190401-V15-02-page15.txt: [("'the", 'the'), ("'twixt", 'twixt')]
LibM19190401-V15-02-page16.txt: [("'the", 'the')]
LibM19190401-V15-02-page21.txt: [("'a", 'a')]
LibM19190401-V15-02-page22.txt: [("'million", 'million')]
LibM19190401-V15-02-page5.txt: [("'LE", 'LE'), ("'being", 'being')]
LibM19190401-V15-02-page6.txt: [("'According", 'According')]
LibM19190701-V15-03-page20.txt: [("'Presbyterian", 'Presbyterian')]
LibM19190701-V15-03-page21.txt: [("'or", 'or')]
LibM19190701-V15-03-page29.txt: [("'lewd", 'lewd')]
LibM19190701-V15-03-page30.txt: [("'v", 'v'), ("'he", 'he')]
LibM19190701-V15-03-page31.txt: [("'.", '.'), ("'the", 'the')]
LibM19190701-V15-03-page32.txt: [("'United", 'United')]
LibM19190701-V15-03-page33.txt: [("'and", 'and')]
LibM19191001-V15-04-page15.txt: [('\'"', '"')]
LibM19191001-V15-04-page18.txt: [("'five", 'five')]
LibM19191001-V15-04-page7.txt: [("'purity", 'purity'), ("'by", 'by')]
LibM19200101-V14-01-page1.txt: [("'IN", 'IN')]
LibM19200101-V14-01-page6.txt: [("'s", 's')]
LibM19200401-V14-02-page25.txt: [("'Volumes", 'Volumes')]
LibM19200701-V14-03-page15.txt: [("'racTIMIriiiiitiriiltililietcliteiViiVittiiiitiEVAlifittiA", 'racTIMIriiiiitiriiltililietcliteiViiVittiiiitiEVAlifittiA')]
LibM19200701-V14-03-page24.txt: [("'A", 'A')]
LibM19200701-V14-03-page27.txt: [("'concerned.", 'concerned.')]
LibM19200701-V14-03-page32.txt: [("'fourth", 'fourth')]
LibM19200701-V14-03-page33.txt: [("'the", 'the')]
LibM19200701-V14-03-page4.txt: [("'Twixt", 'Twixt')]
LibM19201001-V14-04-page15.txt: [("'m", 'm')]
LibM19201001-V14-04-page16.txt: [("'mannum", 'mannum')]
LibM19201001-V14-04-page23.txt: [("'Signs", 'Signs'), ("'Signs", 'Signs'), ("'Cut", 'Cut'), ("'Signs", 'Signs')]
LibM19201001-V14-04-page25.txt: [("'were", 'were')]
LibM19201001-V14-04-page29.txt: [("'praise", 'praise'), ("'for", 'for')]
LibM19201001-V14-04-page7.txt: [("'oppression.", 'oppression.')]
In [23]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction4 Average verified rate: 0.9811434974335735 Average of error rates: 0.03407373440939106 Total token count: 1452019
In [24]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[24]:
[("'", 1499),
('m', 1341),
('d', 1257),
('e', 1013),
('w', 957),
('t', 847),
('n', 787),
('r', 687),
('f', 634),
('g', 386),
('x', 271),
('u', 209),
('k', 192),
('tv', 150),
('th', 122),
('pa', 104),
('sunday-law', 92),
('re', 89),
('z', 83),
('ex', 77),
('co', 75),
('io', 72),
('id', 71),
('mo', 63),
('postmaster-general', 62),
('ga', 58),
('post-offices', 57),
('un', 57),
('un-american', 57),
('va', 57),
('statute-books', 56),
('sunday-closing', 54),
('church-and-state', 49),
('tion', 45),
('mm', 45),
('q', 44),
('li', 43),
('mt', 42),
('attorney-general', 41),
('sunday-rest', 39),
('wm', 38),
('ro', 38),
('pp', 38),
('mi', 37),
('charta', 37),
('mc', 33),
('al', 32),
('ri', 31),
('neander', 31),
('-', 30)]
Correction 5 -- Rejoin Burst Words¶
In [25]:
# %load shared_elements/rejoin_burst_words.py
prev = cycle
cycle = "correction5"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
pattern = re.compile("(\s(\w{1,2}\s){5,})")
replacements = []
clean.check_splits(pattern, spelling_dictionary, content, replacements)
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
LibM19061001-V01-03-page17.txt: [(' r a t h e ', 'rathe')]
LibM19061001-V01-03-page24.txt: [('To', 'To')]
LibM19100101-V05-01-page22.txt: [('It', 'It')]
LibM19100401-V05-02-page52.txt: [('El', 'El')]
LibM19100701-V05-03-page19.txt: [(' f or w a r d\n', 'forward')]
LibM19120401-V07-02-page46.txt: [('It', 'It')]
LibM19121001-V07-04-page29.txt: [('As', 'As')]
LibM19121001-V07-04-page5.txt: [('El', 'El')]
LibM19150101-V10-01-page11.txt: [('To', 'To')]
LibM19150101-V10-01-page4.txt: [('Lo', 'Lo')]
LibM19150401-V10-02-page6.txt: [('\nU N U S U A L ', 'UNUSUAL')]
LibM19150701-V10-03-page27.txt: [('It', 'It')]
LibM19150701-V10-03-page47.txt: [(' m a n is a ', 'manisa')]
LibM19170401-V12-02-page5.txt: [(' p r es en t ', 'present')]
LibM19170701-V12-03-page16.txt: [('Is', 'Is')]
LibM19200101-V14-01-page6.txt: [(' c an n o t ', 'cannot')]
In [26]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction5 Average verified rate: 0.9811555435567139 Average of error rates: 0.0340564930300807 Total token count: 1451992
In [27]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[27]:
[("'", 1499),
('m', 1340),
('d', 1256),
('e', 1012),
('w', 956),
('t', 844),
('n', 784),
('r', 684),
('f', 633),
('g', 386),
('x', 271),
('u', 206),
('k', 192),
('tv', 150),
('th', 122),
('pa', 104),
('sunday-law', 92),
('re', 89),
('z', 83),
('ex', 77),
('co', 75),
('io', 72),
('id', 71),
('mo', 63),
('postmaster-general', 62),
('ga', 58),
('post-offices', 57),
('un', 57),
('un-american', 57),
('va', 57),
('statute-books', 56),
('sunday-closing', 54),
('church-and-state', 49),
('tion', 45),
('mm', 45),
('q', 44),
('li', 43),
('mt', 42),
('attorney-general', 41),
('sunday-rest', 39),
('wm', 38),
('ro', 38),
('pp', 38),
('mi', 37),
('charta', 37),
('mc', 33),
('al', 32),
('ri', 31),
('neander', 31),
('-', 30)]
Correction 6 -- Rejoin Split Words¶
In [28]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction6"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
errors = reports.identify_errors(tokens, spelling_dictionary)
replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=False)
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_split_words(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
LibM19060401-V01-01-page11.txt: [('Mc', 'Alister')]
LibM19060401-V01-01-page35.txt: [('ri', 'e'), ('re', 'd'), ('ti', 'c')]
LibM19061001-V01-03-page19.txt: [('Sabb', 'at')]
LibM19061001-V01-03-page21.txt: [('destruc', 'tion')]
LibM19061001-V01-03-page4.txt: [('spir', 'itual')]
LibM19061001-V01-03-page6.txt: [('LIBERT', 'Y')]
LibM19070101-V02-01-page2.txt: [('ti', 'A')]
LibM19070101-V02-01-page23.txt: [('LIBE', 'RTY')]
LibM19070101-V02-01-page6.txt: [('impor', 'tance')]
LibM19070401-V02-02-page17.txt: [('LAN', 'CASTER')]
LibM19070701-V02-03-page14.txt: [('Demi', 'god')]
LibM19070701-V02-03-page18.txt: [('unfort', 'unately')]
LibM19070701-V02-03-page23.txt: [('Mc', 'Alister')]
LibM19071001-V02-04-page12.txt: [('approv', 'e')]
LibM19071001-V02-04-page14.txt: [('MC', 'KENNA')]
LibM19071001-V02-04-page20.txt: [('controv', 'ersy')]
LibM19071001-V02-04-page38.txt: [('un', 'Christian'), ('Fr', 'eedom')]
LibM19071001-V02-04-page46.txt: [('co', 'operation')]
LibM19071001-V02-04-page48.txt: [('th', 'e')]
LibM19071001-V02-04-page50.txt: [('co', 'respondents')]
LibM19071001-V02-04-page51.txt: [('ren', 'al'), ('re', 'hob')]
LibM19080101-V03-01-page20.txt: [('un', 'Christian')]
LibM19080101-V03-01-page32.txt: [('ob', 'serve')]
LibM19080101-V03-01-page41.txt: [('self-govern', 'ment')]
LibM19080401-V03-02-page1.txt: [('pa', 'I')]
LibM19080401-V03-02-page28.txt: [('Legis', 'lation')]
LibM19080401-V03-02-page30.txt: [('upo', 'n')]
LibM19080701-V03-03-page1.txt: [('ra', 'ff')]
LibM19080701-V03-03-page20.txt: [('re', 'A')]
LibM19080701-V03-03-page28.txt: [('Sund', 'a')]
LibM19080701-V03-03-page37.txt: [('religi', 'o')]
LibM19080701-V03-03-page46.txt: [('Northweste', 'r')]
LibM19081001-V03-04-page11.txt: [('PHILA', 'DELPHIA'), ('WILLI', 'AM')]
LibM19081001-V03-04-page15.txt: [('fi', 'e')]
LibM19081001-V03-04-page19.txt: [('TH', 'E'), ('religi', 'o'), ('Por', 'e'), ('su', 'preme')]
LibM19090101-V04-01-page28.txt: [('threate', 'n')]
LibM19090101-V04-01-page33.txt: [('estab', 'lish')]
LibM19090101-V04-01-page45.txt: [('Mc', "Clure's")]
LibM19090401-V04-02-page11.txt: [('Bonif', 'ace')]
LibM19090401-V04-02-page12.txt: [('Boni', 'face')]
LibM19090401-V04-02-page2.txt: [('po', 'i')]
LibM19090401-V04-02-page31.txt: [('MC', 'MILLAN')]
LibM19090401-V04-02-page45.txt: [('eyo', 't')]
LibM19090401-V04-02-page48.txt: [('fr', 'Ee')]
LibM19090401-V04-02-page49.txt: [('om', 'a')]
LibM19090401-V04-02-page50.txt: [('co', 'operate')]
LibM19090401-V04-02-page52.txt: [('ma', "n's")]
LibM19090401-V04-02-page8.txt: [('Mc', 'Dermott')]
LibM19090401-V04-02-page9.txt: [('co', 'operation')]
LibM19090701-V04-03-page10.txt: [('RECEP', 'TION')]
LibM19090701-V04-03-page29.txt: [('Speakin', 'g')]
LibM19090701-V04-03-page49.txt: [('Appe', 'als')]
LibM19090701-V04-03-page7.txt: [('Mc', 'Crory')]
LibM19091001-V04-04-page38.txt: [('si', 'n')]
LibM19091001-V04-04-page47.txt: [('Appe', 'als')]
LibM19091001-V04-04-page7.txt: [('Mc', 'Kinley')]
LibM19100101-V05-01-page17.txt: [('th', 'at'), ('Mc', 'Kenna')]
LibM19100101-V05-01-page19.txt: [('incon', 'siderable')]
LibM19100101-V05-01-page26.txt: [('gua', 'ranteed')]
LibM19100101-V05-01-page31.txt: [('SU', 'NDAY')]
LibM19100101-V05-01-page39.txt: [('CA', 'Y')]
LibM19100401-V05-02-page15.txt: [('uncon', 'fessed')]
LibM19100401-V05-02-page23.txt: [('secre', 'tary')]
LibM19100401-V05-02-page35.txt: [('PeRsECUTI', 'ON')]
LibM19100401-V05-02-page52.txt: [('legisla', 'tor')]
LibM19100401-V05-02-page6.txt: [('un', 'Christian')]
LibM19100401-V05-02-page8.txt: [('PRESI', 'DENT')]
LibM19100701-V05-03-page1.txt: [('wo', 'g'), ('UN', 'I')]
LibM19100701-V05-03-page29.txt: [('al', 'ways')]
LibM19100701-V05-03-page37.txt: [('HISTOR', 'ICAL'), ('ERRON', 'EOUS')]
LibM19100701-V05-03-page45.txt: [('ch', 'ose')]
LibM19100701-V05-03-page49.txt: [('PROTES', 'TANT'), ('Re', 'stated')]
LibM19100701-V05-03-page7.txt: [('Mc', 'Kinley')]
LibM19101001-V05-04-page15.txt: [('co', 'operate')]
LibM19101001-V05-04-page28.txt: [('PA', 'L')]
LibM19101001-V05-04-page39.txt: [('libert', 'y')]
LibM19101001-V05-04-page49.txt: [('PROTES', 'TANT')]
LibM19101001-V05-04-page50.txt: [('co', 'operation')]
LibM19110101-V06-01-page1.txt: [('nU', 'M')]
LibM19110101-V06-01-page12.txt: [('compuls', 'ion')]
LibM19110101-V06-01-page18.txt: [('Mc', 'Donald')]
LibM19110101-V06-01-page34.txt: [('consid', 'ered'), ('RE', 'LIGION')]
LibM19110101-V06-01-page35.txt: [('shep', "herd's")]
LibM19110101-V06-01-page48.txt: [('ta', 'king')]
LibM19110101-V06-01-page5.txt: [('TI', 'E')]
LibM19110401-V06-02-page1.txt: [('mo', 'Jo')]
LibM19110401-V06-02-page12.txt: [('ment', 'on')]
LibM19110701-V06-03-page14.txt: [('un', 'Christian')]
LibM19110701-V06-03-page20.txt: [('WA', 'RTBURG')]
LibM19110701-V06-03-page25.txt: [('republi', 'c')]
LibM19110701-V06-03-page32.txt: [('religi', 'o')]
LibM19110701-V06-03-page33.txt: [('Switzerlan', 'd')]
LibM19110701-V06-03-page37.txt: [('religi', 'o')]
LibM19110701-V06-03-page38.txt: [('reen', 'forced')]
LibM19110701-V06-03-page50.txt: [('expe', 'rience')]
LibM19111001-V06-04-page11.txt: [('religi', 'o')]
LibM19111001-V06-04-page18.txt: [('Pontif', 'ex')]
LibM19111001-V06-04-page38.txt: [('Co', 'n')]
LibM19111001-V06-04-page43.txt: [('ma', 'king')]
LibM19111001-V06-04-page52.txt: [('ec', 'clesiastical'), ('kl', 'EE'), ('ra', 'm'), ('MI', 'M'), ('LI', 'II'), ('Li', 'N'), ('RI', 'M')]
LibM19120101-V07-01-page12.txt: [('certif', 'ying'), ('ern', 'e')]
LibM19120101-V07-01-page33.txt: [('Notwithstand', 'ing')]
LibM19120101-V07-01-page43.txt: [('ESTAB', 'LISHMENT')]
LibM19120101-V07-01-page49.txt: [('FA', 'IN'), ('TA', 'is'), ('SI', 'TA'), ('ci', 'T'), ('devel', 'opment')]
LibM19120401-V07-02-page23.txt: [('misrepres', 'entation')]
LibM19120401-V07-02-page5.txt: [('M.', '')]
LibM19120701-V07-03-page13.txt: [('hol', 'iday')]
LibM19120701-V07-03-page2.txt: [('Co', 'ercion')]
LibM19120701-V07-03-page4.txt: [('CO', 'NG'), ('gi', 'e')]
LibM19120701-V07-03-page5.txt: [('M.', '')]
LibM19120701-V07-03-page52.txt: [('M.', '')]
LibM19121001-V07-04-page19.txt: [('mul', 'titude'), ('proclama', 'tion')]
LibM19121001-V07-04-page4.txt: [('gl', 'O'), ('ma', 'm'), ('MI', 'M')]
LibM19121001-V07-04-page49.txt: [('gOR', 'E'), ('M.', '')]
LibM19121001-V07-04-page6.txt: [('mo', 'i'), ('G.', '')]
LibM19130101-V08-01-page1.txt: [('WA', 'tTS')]
LibM19130101-V08-01-page2.txt: [('M.', ''), ('Ni', 'M')]
LibM19130101-V08-01-page6.txt: [('LI', 'BERTY')]
LibM19130401-V08-02-page1.txt: [('Lil', 'A')]
LibM19130401-V08-02-page15.txt: [('re', 'pealed')]
LibM19130401-V08-02-page24.txt: [('impor', 'tance')]
LibM19130401-V08-02-page3.txt: [('CHOOS', 'ING')]
LibM19130401-V08-02-page33.txt: [('STURDEVA', 'NT')]
LibM19130401-V08-02-page34.txt: [('cer', 'O')]
LibM19130401-V08-02-page4.txt: [('po', 'O')]
LibM19130401-V08-02-page51.txt: [('denounci', 'ng'), ('JUSTI', 'FIES'), ('re', 't')]
LibM19130401-V08-02-page7.txt: [('M.', '')]
LibM19130701-V08-03-page2.txt: [('ti', 'e')]
LibM19130701-V08-03-page3.txt: [('PRIN', 'CIPLES'), ('GREA', 'T'), ('MI', 'M'), ('MA', 'M')]
LibM19130701-V08-03-page4.txt: [('XL', 'v')]
LibM19130701-V08-03-page41.txt: [('re', 'I')]
LibM19130701-V08-03-page44.txt: [('ce', 'e')]
LibM19130701-V08-03-page48.txt: [('unlawf', 'ul')]
LibM19130701-V08-03-page49.txt: [('eX', 't'), ('ADVER', 'TISED')]
LibM19130701-V08-03-page51.txt: [('AL', 'MA')]
LibM19131001-V08-04-page12.txt: [('yo', 'ng')]
LibM19131001-V08-04-page27.txt: [('EXI', 'LE')]
LibM19131001-V08-04-page28.txt: [('troub', 'ler')]
LibM19131001-V08-04-page4.txt: [('ro', 'o')]
LibM19131001-V08-04-page41.txt: [('ecclesi', 'astical')]
LibM19131001-V08-04-page49.txt: [('ADVER', 'TISED')]
LibM19131001-V08-04-page50.txt: [('re', 'No')]
LibM19131001-V08-04-page52.txt: [('Ak', 'A')]
LibM19131001-V08-04-page7.txt: [('M.', '')]
LibM19140101-V09-01-page31.txt: [('mo', 'I')]
LibM19140101-V09-01-page33.txt: [('RE', 'A')]
LibM19140101-V09-01-page54.txt: [('ADVER', 'TISED')]
LibM19140101-V09-01-page55.txt: [('EA', 'T'), ('CO', 'PY')]
LibM19140101-V09-01-page56.txt: [('relig', 'ion'), ('Ti', 'e')]
LibM19140401-V09-02-page11.txt: [('corporatio', 'n'), ('re', 'formation'), ('Congregatio', 'n')]
LibM19140401-V09-02-page12.txt: [('un', 'der'), ('coun', 'try'), ('combina', 'tion'), ('al', 'I')]
LibM19140401-V09-02-page13.txt: [('ti', 'nes')]
LibM19140401-V09-02-page18.txt: [('ASSEM', 'BLY')]
LibM19140401-V09-02-page25.txt: [('citi', 'zens')]
LibM19140401-V09-02-page3.txt: [('CIRCULAT', 'ING')]
LibM19140401-V09-02-page35.txt: [('PROTES', 'TANT')]
LibM19140401-V09-02-page4.txt: [('Ki', 'Ng')]
LibM19140401-V09-02-page41.txt: [('MAGA', 'ZINE')]
LibM19140401-V09-02-page50.txt: [('M.', ''), ('ADVER', 'TISED')]
LibM19140401-V09-02-page52.txt: [('re', 'ligious')]
LibM19140401-V09-02-page7.txt: [('M.', '')]
LibM19140701-V09-03-page2.txt: [('M.', '')]
LibM19140701-V09-03-page20.txt: [('MC', 'ADOO'), ('FR', 'T')]
LibM19140701-V09-03-page21.txt: [('MC', 'ADOO')]
LibM19140701-V09-03-page34.txt: [('ambi', 'tion')]
LibM19140701-V09-03-page4.txt: [('indi', 'tes')]
LibM19140701-V09-03-page51.txt: [('ti', 'The')]
LibM19140701-V09-03-page9.txt: [('sp', 'oken')]
LibM19141001-V09-04-page1.txt: [('Sp', 'A')]
LibM19141001-V09-04-page26.txt: [('TI', 'The')]
LibM19141001-V09-04-page27.txt: [('Al', 'ES')]
LibM19141001-V09-04-page3.txt: [('nI', 'M')]
LibM19141001-V09-04-page31.txt: [('un', 'fearing'), ('AMERI', 'CANS')]
LibM19141001-V09-04-page38.txt: [('es', 'tablish')]
LibM19141001-V09-04-page4.txt: [('ro', 'O'), ('Ki', 'M')]
LibM19141001-V09-04-page48.txt: [('Magaz', 'ine')]
LibM19141001-V09-04-page49.txt: [('MI', 'r')]
LibM19141001-V09-04-page50.txt: [('Mit', 'T'), ('li', 'M'), ('tE', 'E')]
LibM19141001-V09-04-page51.txt: [('monarchi', 'cal')]
LibM19141001-V09-04-page52.txt: [('Al', 'I')]
LibM19141001-V09-04-page7.txt: [('M.', '')]
LibM19150101-V10-01-page2.txt: [('pre', 'vent')]
LibM19150101-V10-01-page3.txt: [('SUBSCRIP', 'TIONS')]
LibM19150101-V10-01-page4.txt: [('Ki', 'M')]
LibM19150101-V10-01-page51.txt: [('Ti', 'E')]
LibM19150101-V10-01-page52.txt: [('MO', 'M'), ('Mi', 'M')]
LibM19150101-V10-01-page53.txt: [('STIN', 'G')]
LibM19150401-V10-02-page21.txt: [('RE', 'LIGIOUS')]
LibM19150401-V10-02-page28.txt: [('impor', 'tant')]
LibM19150401-V10-02-page3.txt: [('MI', 'M'), ('YA', 'M')]
LibM19150401-V10-02-page40.txt: [('op', 'ening')]
LibM19150401-V10-02-page42.txt: [('underg', 'o')]
LibM19150401-V10-02-page46.txt: [('Re', 'formation')]
LibM19150401-V10-02-page48.txt: [('sa', 'o')]
LibM19150401-V10-02-page50.txt: [('M.', '')]
LibM19150401-V10-02-page6.txt: [('ASSEMB', 'LE')]
LibM19150701-V10-03-page11.txt: [('expec', 'tation')]
LibM19150701-V10-03-page2.txt: [('Connecti', 'cut'), ('M.', '')]
LibM19150701-V10-03-page28.txt: [('violenc', 'e')]
LibM19150701-V10-03-page3.txt: [('Ki', 'M')]
LibM19150701-V10-03-page35.txt: [('lif', 'e')]
LibM19150701-V10-03-page38.txt: [('withou', 't')]
LibM19150701-V10-03-page42.txt: [('M.', '')]
LibM19150701-V10-03-page47.txt: [('po', 'se')]
LibM19150701-V10-03-page48.txt: [('twenty-f', 'our')]
LibM19151001-V10-04-page13.txt: [('politi', 'c')]
LibM19151001-V10-04-page2.txt: [('af', 'filiated')]
LibM19151001-V10-04-page22.txt: [('peo', 'ple')]
LibM19151001-V10-04-page23.txt: [('destruc', 'tion')]
LibM19151001-V10-04-page31.txt: [('Un', 'ion')]
LibM19151001-V10-04-page48.txt: [('rO', 'O'), ('RE', 'C'), ('Ama', 'rillo')]
LibM19151001-V10-04-page49.txt: [('RI', 'M')]
LibM19151001-V10-04-page50.txt: [('Ri', 'M'), ('EM', 'F')]
LibM19160101-V11-01-page12.txt: [('re', 'fused')]
LibM19160101-V11-01-page26.txt: [('se', 'an')]
LibM19160101-V11-01-page4.txt: [('M.', '')]
LibM19160101-V11-01e-page1.txt: [('mi', 'A')]
LibM19160101-V11-01e-page16.txt: [('ss', 'H')]
LibM19160401-V11-02-page10.txt: [('OBSERV', 'ANCE')]
LibM19160401-V11-02-page22.txt: [('re', 'ligious')]
LibM19160401-V11-02-page26.txt: [('Mi', 'n')]
LibM19160401-V11-02-page3.txt: [('MO', 'no')]
LibM19160401-V11-02-page38.txt: [('Pontif', 'ex')]
LibM19160401-V11-02-page4.txt: [('vis', 'ion'), ('teac', 'her')]
LibM19160701-V11-03-page21.txt: [('diplom', 'a')]
LibM19161001-V11-04-page29.txt: [('LI', 'BER')]
LibM19161001-V11-04-page40.txt: [('LIBERT', 'Y')]
LibM19170101-V12-01-page16.txt: [('pa', 'tient')]
LibM19170101-V12-01-page2.txt: [('af', 'filiated')]
LibM19170101-V12-01-page35.txt: [('AMMUN', 'ITION')]
LibM19170101-V12-01-page5.txt: [('re', 'garded')]
LibM19170701-V12-03-page1.txt: [('Lil', 'A')]
LibM19170701-V12-03-page14.txt: [('un', 'Christian')]
LibM19171001-V12-04-page10.txt: [('POR', 'TION')]
LibM19171001-V12-04-page11.txt: [('suav', 'ity')]
LibM19171001-V12-04-page23.txt: [('re', 'forming')]
LibM19180101-V13-01-page24.txt: [('temperanc', 'e')]
LibM19180101-V13-01-page35.txt: [('Th', 'e')]
LibM19180401-V13-02-page31.txt: [('se', 'a')]
LibM19180701-V13-03-page13.txt: [('FR', 'A')]
LibM19181001-V13-04-page14.txt: [('LI', 'BER')]
LibM19181001-V13-04-page32.txt: [('LIBERT', 'Y')]
LibM19190101-V15-01-page14.txt: [('CONFESSIO', 'N')]
LibM19190101-V15-01-page20.txt: [('pa', 'per')]
LibM19190101-V15-01-page21.txt: [('prin', 'ciple')]
LibM19190101-V15-01-page22.txt: [('un', 'Christian')]
LibM19190101-V15-01-page5.txt: [('COUN', 'TRY')]
LibM19190401-V15-02-page17.txt: [('RE', 'LIGIOUS')]
LibM19190401-V15-02-page22.txt: [('Ca', 'sar'), ('sar', 'the')]
LibM19190401-V15-02-page5.txt: [('vA', 'LE')]
LibM19190701-V15-03-page2.txt: [('affil', 'iated')]
LibM19190701-V15-03-page28.txt: [('peo', 'ple')]
LibM19190701-V15-03-page32.txt: [('reli', 'gion')]
LibM19190701-V15-03-page9.txt: [('religi', 'ous')]
LibM19191001-V15-04-page1.txt: [('Lil', 'A')]
LibM19191001-V15-04-page22.txt: [('Ma', 'Ma')]
LibM19191001-V15-04-page23.txt: [('MI', 'NI')]
LibM19191001-V15-04-page25.txt: [('Ma', 'Ms')]
LibM19200101-V14-01-page2.txt: [('M.', ''), ('enfor', 'ce')]
LibM19200401-V14-02-page13.txt: [('co', 'operation')]
LibM19200401-V14-02-page14.txt: [('Sund', 'a'), ('co', 'operation')]
LibM19200401-V14-02-page23.txt: [('se', 'a')]
LibM19200401-V14-02-page35.txt: [('TI', 'THE')]
LibM19200401-V14-02-page6.txt: [('LIBERT', 'Y')]
LibM19200701-V14-03-page14.txt: [('AL', 'L')]
LibM19200701-V14-03-page15.txt: [('M.', '')]
LibM19200701-V14-03-page3.txt: [('Tir', 'A')]
LibM19200701-V14-03-page8.txt: [('re', 'enacted')]
LibM19201001-V14-04-page23.txt: [('UN', 'Christian'), ('Un', 'Scriptural')]
LibM19201001-V14-04-page31.txt: [('wa', 'n')]
LibM19201001-V14-04-page32.txt: [('SY', 'St')]
LibM19201001-V14-04-page7.txt: [('gl', 'o'), ('Mayflo', 'wer')]
In [29]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction6 Average verified rate: 0.9814180834781028 Average of error rates: 0.03366287600880411 Total token count: 1451734
In [30]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[30]:
[("'", 1499),
('m', 1326),
('d', 1254),
('e', 998),
('w', 956),
('t', 837),
('n', 776),
('r', 682),
('f', 633),
('g', 384),
('x', 271),
('u', 206),
('k', 192),
('tv', 150),
('th', 119),
('pa', 100),
('sunday-law', 92),
('z', 83),
('ex', 75),
('io', 72),
('re', 72),
('id', 71),
('co', 64),
('postmaster-general', 62),
('mo', 58),
('ga', 58),
('post-offices', 57),
('un-american', 57),
('statute-books', 56),
('va', 56),
('sunday-closing', 54),
('church-and-state', 49),
('un', 46),
('mm', 46),
('q', 44),
('mt', 42),
('attorney-general', 41),
('tion', 40),
('sunday-rest', 39),
('wm', 38),
('pp', 38),
('charta', 37),
('ro', 36),
('li', 36),
('neander', 31),
('-', 30),
('seventhday', 30),
('mi', 28),
('es', 28),
('ft', 28)]
Correction 7 -- Rejoin Split Words II¶
In [31]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction7"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
errors = reports.identify_errors(tokens, spelling_dictionary)
replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=True)
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_split_words(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
LibM19060401-V01-01-page11.txt: [('de', 'calogue')]
LibM19060401-V01-01-page35.txt: [('en', 'th'), ('d', 'ak'), ('r', 'te')]
LibM19060701-V01-02-page12.txt: [('LIB', 'ERTY')]
LibM19061001-V01-03-page19.txt: [('m', 'es')]
LibM19061001-V01-03-page22.txt: [('bane', 'ful')]
LibM19070101-V02-01-page18.txt: [('IN', 'gress')]
LibM19070101-V02-01-page21.txt: [('W', 'ILLIMANTIC')]
LibM19070101-V02-01-page25.txt: [('r', 'esided')]
LibM19070401-V02-02-page31.txt: [('wine', 'bibbers')]
LibM19070401-V02-02-page36.txt: [('A', 'STI')]
LibM19070701-V02-03-page18.txt: [('WILL', 'IAMS')]
LibM19070701-V02-03-page8.txt: [('LIB', 'ERTY')]
LibM19071001-V02-04-page15.txt: [('rem', 'arkable'), ('n', 'ation')]
LibM19071001-V02-04-page32.txt: [('C', 'hr')]
LibM19071001-V02-04-page51.txt: [('for', 'th'), ('r', 'ow')]
LibM19080101-V03-01-page26.txt: [('PRES', 'IDENT')]
LibM19080101-V03-01-page31.txt: [('Cab', 'inet')]
LibM19080401-V03-02-page28.txt: [('Legis', 'lation')]
LibM19080401-V03-02-page30.txt: [('p', 'rinciple')]
LibM19080701-V03-03-page1.txt: [('ra', 'ff')]
LibM19080701-V03-03-page39.txt: [('move', 'ment')]
LibM19080701-V03-03-page43.txt: [('uni', 'versal')]
LibM19080701-V03-03-page52.txt: [('THE', 'RE'), ('L', 'um')]
LibM19081001-V03-04-page1.txt: [('V', 'oiD')]
LibM19081001-V03-04-page15.txt: [('con', 'demned')]
LibM19081001-V03-04-page38.txt: [('obj', 'ect')]
LibM19090101-V04-01-page33.txt: [('estab', 'lish')]
LibM19090101-V04-01-page52.txt: [('i', 'll')]
LibM19090401-V04-02-page2.txt: [('f', 'ri')]
LibM19090401-V04-02-page48.txt: [('fr', 'Ee')]
LibM19090701-V04-03-page34.txt: [('CHRIS', 'TIAN')]
LibM19090701-V04-03-page49.txt: [('APPEAR', 'ANCE')]
LibM19091001-V04-04-page41.txt: [('kin', 'gdom')]
LibM19091001-V04-04-page47.txt: [('APPEAR', 'ANCE')]
LibM19100101-V05-01-page14.txt: [('WASH', 'INGTON')]
LibM19100101-V05-01-page24.txt: [('per', 'se')]
LibM19100101-V05-01-page31.txt: [('L', 'OTS')]
LibM19100101-V05-01-page39.txt: [('L', 'OS')]
LibM19100101-V05-01-page49.txt: [('W', 'ASHINGTON')]
LibM19100401-V05-02-page23.txt: [('secre', 'tary')]
LibM19100401-V05-02-page5.txt: [('LIB', 'ERTY')]
LibM19100401-V05-02-page52.txt: [('C', 'HRISTIANITY'), ('Jan', 'uary')]
LibM19100701-V05-03-page32.txt: [('the', 'reof')]
LibM19100701-V05-03-page45.txt: [('ch', 'ose')]
LibM19100701-V05-03-page49.txt: [('W', 'ASHINGTON')]
LibM19101001-V05-04-page24.txt: [('MON', 'TREAL')]
LibM19101001-V05-04-page49.txt: [('Romani', 'sm')]
LibM19110401-V06-02-page18.txt: [('UNI', 'VERSITY')]
LibM19110701-V06-03-page32.txt: [('sent', 'iments'), ('to', 're')]
LibM19110701-V06-03-page37.txt: [('Chur', 'ch')]
LibM19110701-V06-03-page45.txt: [('the', 're')]
LibM19110701-V06-03-page50.txt: [('expe', 'rience')]
LibM19111001-V06-04-page16.txt: [('AMER', 'ICA')]
LibM19111001-V06-04-page52.txt: [('ec', 'clesiastical')]
LibM19120101-V07-01-page49.txt: [('FA', 'ro'), ('devel', 'opment')]
LibM19120101-V07-01-page50.txt: [('W', 'ASHINGTON')]
LibM19120401-V07-02-page48.txt: [('LIB', 'ERTY')]
LibM19120701-V07-03-page13.txt: [('hol', 'iday')]
LibM19120701-V07-03-page15.txt: [('St', 'ates')]
LibM19120701-V07-03-page2.txt: [('Co', 'ercion')]
LibM19120701-V07-03-page26.txt: [('gov', 'ernment')]
LibM19120701-V07-03-page38.txt: [('AMEND', 'MENTS')]
LibM19120701-V07-03-page4.txt: [('e', 'riK')]
LibM19120701-V07-03-page52.txt: [('A', 'VE'), ('t', 'ok'), ('N', 'Os')]
LibM19121001-V07-04-page11.txt: [('c', 'ognition')]
LibM19121001-V07-04-page29.txt: [('cit', 'ations')]
LibM19121001-V07-04-page44.txt: [('Hank', 'ow')]
LibM19121001-V07-04-page5.txt: [('R', 'EC')]
LibM19121001-V07-04-page6.txt: [('a', 'Yr'), ('a', 'dm'), ('he', 'ft'), ('I', 'lai')]
LibM19121001-V07-04-page8.txt: [('prop', 'osition')]
LibM19130101-V08-01-page24.txt: [('LIB', 'ERTY')]
LibM19130101-V08-01-page40.txt: [('state', 'ments')]
LibM19130101-V08-01-page42.txt: [('a', 'nd')]
LibM19130101-V08-01-page43.txt: [('im', 'prisonment')]
LibM19130101-V08-01-page49.txt: [('T', 'ennessee')]
LibM19130101-V08-01-page50.txt: [('Rev', 'ised'), ('and', 'Re')]
LibM19130101-V08-01-page6.txt: [('I', 'NG')]
LibM19130401-V08-02-page2.txt: [('Association', 'al')]
LibM19130401-V08-02-page25.txt: [('des', 'ecration')]
LibM19130401-V08-02-page30.txt: [('the', 're')]
LibM19130401-V08-02-page50.txt: [('and', 'Re')]
LibM19130701-V08-03-page2.txt: [('e', 'ta')]
LibM19130701-V08-03-page42.txt: [('GOV', 'ERNMENT')]
LibM19130701-V08-03-page51.txt: [('AL', 'MA')]
LibM19131001-V08-04-page12.txt: [('yo', 'ng')]
LibM19131001-V08-04-page41.txt: [('establish', 'ment')]
LibM19140101-V09-01-page19.txt: [('IN', 'TERIOR')]
LibM19140101-V09-01-page23.txt: [('govern', 'ment')]
LibM19140101-V09-01-page31.txt: [('s', 'AO')]
LibM19140101-V09-01-page38.txt: [('com', 'memoration')]
LibM19140101-V09-01-page53.txt: [('Ar', 'ticles')]
LibM19140101-V09-01-page56.txt: [('e', 're')]
LibM19140401-V09-02-page11.txt: [('i', 'ons')]
LibM19140401-V09-02-page12.txt: [('combina', 'tion')]
LibM19140401-V09-02-page13.txt: [('ti', 'nes')]
LibM19140401-V09-02-page25.txt: [('per', 'se')]
LibM19140401-V09-02-page4.txt: [('M', 'UN'), ('g', 'EE'), ('to', 'RE'), ('M', 'EH')]
LibM19140401-V09-02-page49.txt: [('e', 'th')]
LibM19140401-V09-02-page52.txt: [('re', 'ligious')]
LibM19140701-V09-03-page17.txt: [('and', 're')]
LibM19140701-V09-03-page20.txt: [('I', 'ts')]
LibM19140701-V09-03-page3.txt: [('or', 'zo')]
LibM19140701-V09-03-page30.txt: [('A', 'pologete')]
LibM19140701-V09-03-page34.txt: [('con', 'trary'), ('ambi', 'tion')]
LibM19140701-V09-03-page4.txt: [('M', 'io'), ('indi', 'tes'), ('M', 'UT')]
LibM19140701-V09-03-page49.txt: [('i', 'ke')]
LibM19141001-V09-04-page13.txt: [('by', 're')]
LibM19141001-V09-04-page29.txt: [('can', 'es')]
LibM19141001-V09-04-page3.txt: [('of', 'tenest'), ('m', 'om'), ('or', 'zo')]
LibM19141001-V09-04-page38.txt: [('es', 'tablish')]
LibM19141001-V09-04-page4.txt: [('M', 'Eg')]
LibM19141001-V09-04-page49.txt: [('i', 'nn'), ('I', 'ntr')]
LibM19141001-V09-04-page50.txt: [('L', 'os')]
LibM19150101-V10-01-page15.txt: [('con', 'sistency')]
LibM19150101-V10-01-page21.txt: [('per', 'se')]
LibM19150101-V10-01-page22.txt: [('per', 'se')]
LibM19150101-V10-01-page3.txt: [('Y', 'ou'), ('or', 'zo')]
LibM19150101-V10-01-page35.txt: [('Y', 'ork'), ('CRU', 'ISER')]
LibM19150101-V10-01-page4.txt: [('M', 'Eg')]
LibM19150101-V10-01-page53.txt: [('t', 'iro')]
LibM19150401-V10-02-page14.txt: [('pam', 'phlets')]
LibM19150401-V10-02-page23.txt: [('WASH', 'INGTON')]
LibM19150401-V10-02-page25.txt: [('WASH', 'INGTON')]
LibM19150401-V10-02-page28.txt: [('CAP', 'TIVE')]
LibM19150401-V10-02-page3.txt: [('M', 'UN'), ('or', 'zo')]
LibM19150401-V10-02-page46.txt: [('CALI', 'FORNIA')]
LibM19150401-V10-02-page49.txt: [('HUN', 'DRED')]
LibM19150701-V10-03-page19.txt: [('C', 'opyright')]
LibM19150701-V10-03-page2.txt: [('Massa', 'chusetts'), ('Col', 'lege')]
LibM19150701-V10-03-page3.txt: [('M', 'Eg'), ('illus', 'trated')]
LibM19150701-V10-03-page4.txt: [('I', 'Ng')]
LibM19151001-V10-04-page2.txt: [('af', 'filiated'), ('Col', 'lege')]
LibM19151001-V10-04-page48.txt: [('C', 'UE'), ('O', 'RE'), ('Ama', 'rillo')]
LibM19151001-V10-04-page49.txt: [('m', 'Es')]
LibM19151001-V10-04-page51.txt: [('E', 'Li')]
LibM19160101-V11-01-page12.txt: [('who', 're')]
LibM19160101-V11-01-page26.txt: [('per', 'se')]
LibM19160101-V11-01-page27.txt: [('per', 'se')]
LibM19160101-V11-01-page28.txt: [('R', 'ighteousness')]
LibM19160101-V11-01-page44.txt: [('r', 'ef')]
LibM19160101-V11-01-page6.txt: [('KEN', 'TUCKY')]
LibM19160101-V11-01-page7.txt: [('Calif', 'ornia')]
LibM19160101-V11-01e-page11.txt: [('per', 'se')]
LibM19160401-V11-02-page22.txt: [('mat', 'ters')]
LibM19160401-V11-02-page4.txt: [('C', 'hr')]
LibM19160401-V11-02-page48.txt: [('Mar', 'shal')]
LibM19160701-V11-03-page14.txt: [('A', 'fter')]
LibM19160701-V11-03-page15.txt: [('de', 'partment')]
LibM19160701-V11-03-page34.txt: [('r', 'ea')]
LibM19160701-V11-03-page39.txt: [('C', "esar's")]
LibM19161001-V11-04-page15.txt: [('CHAR', 'ACTERISTIC')]
LibM19161001-V11-04-page2.txt: [('inter', 'ests')]
LibM19170101-V12-01-page2.txt: [('Ten', 'nessee'), ('af', 'filiated')]
LibM19170101-V12-01-page21.txt: [('per', 'se')]
LibM19170101-V12-01-page3.txt: [('Sab', 'batarians')]
LibM19170101-V12-01-page30.txt: [('pro', 'hibit')]
LibM19170101-V12-01-page6.txt: [('and', 're')]
LibM19170701-V12-03-page4.txt: [('I', 'RE')]
LibM19170701-V12-03-page9.txt: [('f', 'undamentals')]
LibM19171001-V12-04-page10.txt: [('CON', 'SUMED')]
LibM19171001-V12-04-page11.txt: [('suav', 'ity')]
LibM19171001-V12-04-page19.txt: [('ha', 're')]
LibM19171001-V12-04-page30.txt: [('gen', 'eral'), ('S', 'HUTE')]
LibM19180101-V13-01-page11.txt: [('intro', 'duced')]
LibM19180101-V13-01-page12.txt: [('C', "esar's")]
LibM19180101-V13-01-page17.txt: [('deter', 'Mination')]
LibM19180101-V13-01-page24.txt: [('a', 'nd')]
LibM19180101-V13-01-page31.txt: [('a', 're')]
LibM19180401-V13-02-page20.txt: [('or', 'dained')]
LibM19180401-V13-02-page24.txt: [('MASSA', 'CHUSETTS')]
LibM19180701-V13-03-page16.txt: [('THE', 'TA')]
LibM19180701-V13-03-page19.txt: [('to', 'ut')]
LibM19181001-V13-04-page11.txt: [('per', 'se')]
LibM19181001-V13-04-page20.txt: [('peril', 'ous')]
LibM19190101-V15-01-page21.txt: [('per', 'se')]
LibM19190401-V15-02-page22.txt: [('to', 'Ca')]
LibM19190401-V15-02-page23.txt: [('le', 'Fevre')]
LibM19190701-V15-03-page18.txt: [('minor', 'ity')]
LibM19190701-V15-03-page2.txt: [('Of', 'fice'), ('affil', 'iated')]
LibM19190701-V15-03-page31.txt: [('per', 'se')]
LibM19190701-V15-03-page34.txt: [('in', 'struction')]
LibM19190701-V15-03-page6.txt: [('As', 'sn')]
LibM19191001-V15-04-page17.txt: [('bap', 'tism')]
LibM19191001-V15-04-page21.txt: [('O', 'NE')]
LibM19191001-V15-04-page24.txt: [('gov', 'ernment')]
LibM19200101-V14-01-page5.txt: [('a', 'nd')]
LibM19200101-V14-01-page6.txt: [('con', 'cerning')]
LibM19200401-V14-02-page11.txt: [('W', 'ashington')]
LibM19200401-V14-02-page22.txt: [('per', 'se')]
LibM19200401-V14-02-page23.txt: [('per', 'se')]
LibM19200401-V14-02-page27.txt: [('pro', 'tection')]
LibM19200401-V14-02-page36.txt: [('z', 'oo')]
LibM19200701-V14-03-page19.txt: [('ques', 'tions')]
LibM19200701-V14-03-page25.txt: [('per', 'se')]
LibM19200701-V14-03-page36.txt: [('t', 'itI')]
In [32]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction7 Average verified rate: 0.9815507052480597 Average of error rates: 0.033392883345561265 Total token count: 1451546
In [33]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[33]:
[("'", 1499),
('m', 1314),
('d', 1253),
('e', 997),
('w', 951),
('t', 835),
('n', 774),
('r', 677),
('f', 631),
('g', 383),
('x', 271),
('u', 206),
('k', 192),
('tv', 150),
('th', 117),
('pa', 100),
('sunday-law', 92),
('z', 82),
('ex', 75),
('io', 71),
('id', 71),
('co', 64),
('postmaster-general', 62),
('re', 59),
('mo', 58),
('ga', 58),
('post-offices', 57),
('un-american', 57),
('statute-books', 56),
('va', 56),
('sunday-closing', 54),
('church-and-state', 49),
('mm', 46),
('q', 44),
('un', 43),
('mt', 42),
('attorney-general', 41),
('tion', 40),
('sunday-rest', 39),
('wm', 38),
('pp', 38),
('charta', 37),
('ro', 35),
('li', 35),
('neander', 31),
('-', 30),
('seventhday', 30),
('mi', 28),
('ky', 28),
('religio-political', 27)]
Correction 8 -- Remove Long Tokens with Repeating "m"s¶
In [34]:
reports.long_errors(errors_summary, min_length=17)[:50]
Out[34]:
(['countermemorialists', 'immumnitommuummunitimmtwuntnimmummiona', 'antiprohibitionists', 'mmierriotitimmiiembitimiimerimiim', 'vuaziffiemunimeluitennotinutnnifin', 'nrmomoommomomrsoommommokmagmkwon', 'iiiwtierttititiiiit', 'iiimiumiummummimmominimmimmimmimmihmimmiimminimummummimmumummemimmimmimm', 'church-and-stateunion', 'iiiirreriiitlhinifid', 'simmmismwklaiigitil', 'mimmimmummiiimmimii', 'iiiiiiiiiiiliiiiiiii', 'rwiumwimmiiiiimimmumnii', 'mmmmmmmmmmmmmmmmmns', 'pilurprmarasigimmt', 'preventivejurisdiction', 'miilmilliiimilliifilmidid', 'enosnantiemotainotientetiemtio', 'mmozmrommomommonorummanoz', 'humilffilitiffinummiffiiimminlimmummiiiiiiiiimm', 'xramoxmozramommommocmommmommx', 'seventh-day-observing', 'yffinsmemmmmmmmwmswmmmmmmmnim', 'ititeiltintonecfctration', 'migininaugimmikimmu', 'latitteilommtwtfifolror', 'mvstimpsmgrecuttliv', 'iullnnunulnmmuumnuluunnunuumlt', 'mgraotrtraccommozraglgraccommicami', 'aommommemsatammogarmaxsorarmwelimmelinuilmenompommixliniewtlominermiimmurpimumnuommurm', 'iiiiiiiiiiimmiumulinuilmilne', 'better-established', 'rnomommonoszuzummummanmmollommom', 'nemmiwiiiimortrinl', 'ffiummummiummunnummumummmumumummummunamummunummuumummmunummunnummumummumnitumnims', 'counter-petitioners', 'imummintommumnimminumummmummlimmunumummummmunumumminutimmummmitimumnimmm', 'inimlfilninninilli', 'mmmmmmmmermmmmmmmmmmmmmmm', 'gawavaiaaamminonwirit', 'xxxxxxxxxxmocxxxxxxxx', 'miiiiiniiiiiiimilimiiiiiiiiiiimiliiiiiimmionimmiumingiiiiiiiiiiiimmiliiiimmomiiiiminwiliiiiiiiiiiiiiiiiiinminsummuilimiliiinimonnimmiiiiiiiiiiiiiiiiiiimiimiq', 'wimiimilliiiiiiiiiiiiimmimmithiiiiimmumminunifiniiiii', 'unimilismimitimittnismitimmimittlimummumitemitimmummmintimmimiumiumnitimllminiummuntiummilmi', 'anumilimminiumminnimminumminnummiliniummiliml', 'affindlitilffilillikvillehd', 'mmmmmmmmmuimmmmmmrimmmmmmmmmmmmmmm', 'muminatatimiumumuutumitimmittimmummminnumminumuffiummumummunnomiminummuummummimmumnini', 'rsomravramcmotrammragmonommxmommansom', 'mmmmmmffirimmmmmmmmmmmmemmmmmmm', 'antiecclesiastical', 'penmenisrisdinaorabsesiceewer', 'ractimiriiiiitiriiltililietcliteiviivittiiiitievalifittia', 'politico-religious', 'niviitiesialiffiliifiiilrimlnii', 'unemeeeeeeneeleeneeetelli', 'vaaffisl-co-pacific', 'lllllllllllllllllllllllllllllllllllllllllllllllllllllllllll', "mmmosmermsimmmemmnm'iligh", 'mmmmmnemmommmnmmmme', 'religion-and-state', 'ifaimitialiumuumnimimmtmimummuimmunimiummitinimminimmumminummumunnommumminumninummunim', 'iiiiiiiiiiiiiiiiiiiiiii', 'mgimmmmmmmmmmmmmmmmmmmmmmmmmmmm', 'msossgmaiaassmgeamakawmalnarlaa', 'lecosniiionpainoticsovicesfirde', "linunimmimrs'inumumu", 'rrrprrrrrrrritrrrf', 'lamjukgmdavagixiatm', 'toforeigncountries', 'wmiwimiiiiiiiiiiiiiiimmiumm', 'tixtreciremyemiresnirtiortiorrioritortiorrii', 'democraticrepublican', 'hihinhiniiiiiiirin', 'two-and-a-halfmile', 'commander-in-chief', 'self-determination', 'mmimumumwwwiiiiiiiiiiiiiiiiiiiiiiiiiiiiilleeleteeemememme', 'nmmmommrsonomrznemonmonomnrmotruomonom', 'emmonmenommomumommommommonotrnommirnmn', 'iiiiiiilliniemniiiiii', 'tsereanctosrothciertny', 'snlrnuurinunuununa', 'rimareinsmiummisimememesiermem', 'mcommommommuommommonomm', 'tiarezemieeleismikiimeeemiewew', 'emerhilsamalsinalso', 'pimumwmummuniumummtimmtunit', 'burckhardt-schatzmann', 'constitution-makers', 'limmiiiiimiiiiiiiiiiiiiiiiiiiiiiiiiiiiie', 'hummmtimmmummummummore', 'iiiiiiiiiiiiiiiiiii', 'feemowiwiedimeiersig', 'one-day-rest-inseven', 'maimimiummaimmismilinuminutimmuminiumilmmitimmummumwmoimminummiumnimmititilowinitimiiiti', 'moerlrlreemoinmemmommmommikumoe', 'mmmmmmmmmmmmmmmmmmmmmm', 'postmaster-general', 'establishingreligious', 'one-day-rest-in-seven', 'emelieniwionsavibannotisloneemite', 'vice-president-elect', 'faipmkrivmriiyamkrkilsriiirrrriiiirrrrisikv', 'rilifininniimummaimumeiminiiiiiiiniiiiiiiiiiiiiiiiiimmpumummhimumwommiimmiimmiiiiimmimmimmimiumnimimmim', 'e-illmllommimilimmilummumenimilmnimuningumminumiiiiimilmimmunimifinnilionontimmigimiliiimiffiliffilimiliiiiiiiiiiiiimm', 'unummonummtummunialliimumiir', 'hiiiiiiiiiiiiiiiii', 'wimmummtmmuntifiummiummiummmommumwffimmiummummummulmtmminammmmunnummmumummummumummr', 'smmusissommummusismussmimussissusissimmimmiiiiiiium', 'hiimiiiiiiiiiiiiiiiiniiumiuminimui', 'iiiimmummiatumbiiiiimbimummiiiiiiimmimm', 'xxxxxxxxxxxxxxxxxx', 'ramtersimrammemarkirracarmermartm', 'mmipoinnonfoemnnioannim', 'nomenegvoicedienast', 'inoomalloisossimis', 'nmommumammammunnumumuum', 'mozmnmwommolzemrammonommommommommn', 'iillrieeiaiiirriardi', 'agaomoorwairalioigtiargial', 'lmiiiiiiiiiiiiiiiiiii', 'ihilibillilltreterita', 'secretary-ofthe-interior', 'conscience-fettered', 'muummmunnummonmummumuummmunimmupm', 'campbell-bannerman', 'impreeloreesocoeselaal', 'ffassininsonsiwoloolgasers', 'unnnnnnunnmamnumununnmmunmniiimm', 'non-sunday-observing', 'piihnummuumbhimurunimenhomuummununimminhhohuminumuunummunnuhhhimminbui', 'mheminuffinfillffilimis', 'alliallallialliallaillassiiiiiiiiiiimiiiiiiiiiiiiim', 'mmmmeimmimmmmmmmmmmmmmmttmmmmmtim', 'inforfaisiomomincomocadoviemmigoimiwa', 'smossmunssunommummusnmussmssmissussmsmussmmssmissmossmussussummmmusstmosssmsmssmnnsmimmumsmimmwsrmossumms', 'sssssssssssssssssssss', 'mmiiiimumhimimmiiiiimm', 'statesman-preacher', 'inter-denominational', 'nosonmomorwemcwaint', 'reconstructionists', 'mmommmommommommmom', 'mnrummommommoncommommmown', 'nfiemmeemmeemmmeeeeeeeeeeeemeeem', 'ipuitnilinimilliiiinulillluunii', 'ehmmmmmmmmmmmmmmmmmmmmmmmmmmmmm', 'mmmmommmmmmmwmnirimemmmmmm', 'wamegkimnmrummmmesemvmmmrmk', 'nininimummujimininlini', 'lieutenantgovernor', 'self-glorification', 'jiuwuuwnnwumllonllllllllhihiiuiiuihul', 'ommumniummunuimiumuutimutimmulummimmiummintomunmumumummumumumnomminuninumninummumumummtuntiummirt', 'self-aggrandizement', 'mummmummimmimmimmimmiiiimmiiiimmiummimmiimimmimmimmiihimmiimmi', 'religious-sabbatic', 'iitoitllislossoliiosill', 'intheszealwarfejrrnicenathemoatiry', 'religio-constitutional', 'iiiiiiiiiiiiiiiiiiii', 'personal-heart-conversion', 'much-to-be-desired', 'netlftrrmmidhimizmmommommilvmm', 'curiosity-gratifying', 'ummuummutmummuummmiummummummummumumminummonummunummmummuummuuttimmumut', 'immuunumummummmtuummummiumunumtumffimmmutummunmuu', 'mconslfaitmeegtifo', 'monmmaimmenimmmmmmmemm', 'itmlinillitiniiimmullimitilittiminunitiffitiminimmituniumnitmitilistimmilimutiiiiiimitimitintiumnimmummitm', 'emsmwmmmwmmmnmhoneni', 'eimmiumiiiiiiimmumummiiimumillimimminimumaniumiffiffimmiummuumniimmommumiummlinmmiumullimmi', 'selfaggrandizement', 'flummimmumommifiumwmffimmumnimmimummlimmumimmmunlimmmummmumuummumummlimuummumumumung', 'faimmeigegrommegfa', 'satisfactostruction', 'miommooomoomsoicimuchmusuoihiuoimisiummicosississinasseeememeescs', 'monommomozragrammxragnm', 'pecsetemmeltigazolom', 'ssumsffismssumusummummtmussessumnsumussunstsmossmossmwsussmumunnmunsummossumsnwssumminimmsnintminimmusmussinissunues', 'trgatimedimegoovemotwo', 'iiimillilintirnimmimmiiiiiiiiiiiiiiitiiiiiiiiiiiiiffilii', 'animmiimmimmiimmiimimmiwimmumwhimommommiimmihmummimiummimmemiimmummummenumiiiimmuumminummis', "attorney-general's", 'rimmineiiiiiiiiimirre', 'sunday-enforcement', 'momeoecimmoimommomommoiximm', 'nitroenrtenaddlimeg', 'mititayerwiriiiinicrierier', 'twenty-four-hour-day', 'atssussusumoususissonclaciiimmiimmisiscommissi', 'ragmmmmmmmmmmmmmmmmmmmmimmm', 'mmmmmmmmmmmmmmmmme', 'xectimmecemommommiimommommomme', 'mmohcomemmaragraanilmmmohm', 'iiiiiiiinillitiiii', 'criiitriatoyearetriarmireirntrecltwieviretriarctieanyaremiractmiteetreowehatio', 'lllllllllllllllllllllllllll', 'state-and-religion', 'compulsory-sunday-law', 'iiiiiiiiiiiiiiiiiiiiiiiiiii', 'hmhimimmiiiiiimmihmiimumm', 'iiiiiiiiiiiiiiiiiiiii', 'mouaamaaammmaaaaaaaaamamanmmammmammimaaaaaamaaaaaammiaaaaaa', 'illlllulllllllllllllllllllllllllllll', 'iwiiiiiiiiiiiiiiiiii', 'associate-justices', 'mmumiimmumiummimmiimmiiimimmiwimmumbiumummuimmiimmwimummummirmiumie', 'consaalermtooldlny', 'counterdenunciations', 'wralrammimmrzrznomnommgmmonom', 'iiiiiiiimmiiiiiiiiiiiiimmium', 'iimmiimmummuimmimummwimummmimmimimmiummmummuminwimbhmmimmiliniffillinnuffiffill', 'obviouslyagreement', 'one-dayof-rest-in-seven', 'mezmommommonommomommommommommmom', 'rrigtreatiariiirriiriiiiriiiitrivittioriiilrrictiiilriiiitii', 'mmommommimommotimmotmm', 'history-confirming', 'semi-ecclesiastical', 'lffiffimffithimmouninoffimmommummuommunimmonwiniiiiminnumumminriumminlimminiiiiiiiiiiimmonimum', 'tomplonsesolomerol', 'mmmmmmnswmmesimmornmmmmmm', 'intelligent-looking', 'ivosengtoexirmemed', 'suspension-bridges', 'self-righteousness', 'miummuiiiiiiiiiimmummmiiiiihimmmummimmummimmimi', 'mmemmmmmmmmmmeemmmemmmmmmmmmm', 'mmmnwnsommmmmmmmrmmmolm', 'iumitimumummumunnumaintimmummumumiumummumummtunmitimuumminnimuummuummumminumismiumnimmuntimmmuthw', 'five-million-dollar', 'esnmemmmmmmrimmmmeenmmemmm', 'politico-ecclesiastical', 'counterallegations', 'meenmenmmmmmmmmmawknmmmmmeg', 'mmmmmmmmmmmmmmmmmmmx', 'alaska-yukon-pacific', 'ecemoictiemememoodemeeeme', 'commandment-keepers', 'trothofabusesandegurpatent', 'trading-with-the-enemy', 'go-to-churchor-stay-indoors', 'bureau-of-military-intelligence', 'mimmutinsimiunimminimmummusilinnimmimuminnumminnimmummilinuffisliinummimilmilimitimiumminniiniiitimitimmimmilimititinnum', 'seventeenth-century', 'iitiiiitiwiiiititivaititignifittaiiiitqawilitilitit', 'mmniummiunrimiiiiiiiilfmfiinotoiiimun', 'ussosiiimiwohmiiiiiiiismisoisisiiiiminallioisoisoisoisosiososi', 'mrnmimmmm-rimmmmmmoamrimmmmm', 'prescott-wilson-tumulty', 'nifilnimifintilllillflillifilnifilmiummiiffillfill', 'inimiiiiiminunimmilimumusinominimuninimmilmr', 'emirmeilsaarsinemiliehmee', 'rograssmargmeermirl', 'ttimilimmumulminnittinitintinninitutimmi', 'frankfort-on-the-main', 'glilihiliiiiiiraiii', 'religious-legislation', 'intfilnilhimirimihimmihimiirminlnimimiriminiimium', 'minimmiiiiiiiiiimihwohimulla', 'one-day-of-rest-in-seven', 'uvrapsimisulswipampiampv', 'lieutenant-colonel', 'iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii', 'yhmommomownwmmmmmmm', 'half-pintof-claret', 'mutummimmomminumummummumunimmiumummummlimummmumumunummtimummimintowitmmummrx', 'immegmmmmmmmmmmmmmmmmmr', 'mmmeammmmmmmmmmmmmmmmmmmemmmmm', 'parochial-school-system', 'ztkirmintzflrmerifranc', 'mramiluesimairrimamesiemiamemilie', 'emimmiummehmemeimmimminimmummminimmeminimmumummemminiummunieli', 'agretiitilitltitstriffigtisifitiveram', 'wommiumniffunivirlsoir', 'maher-shalal-hash-baz', 'xxxxxxxxxxxxxxxxxxxxxx', 'xxxxxxxxxxxxxxxxxxxxx', 'ossionosollsomasismisiiiiiisiiimissimisomallaallallaillaffluss', 'mommummmufflummunmuummmutommummmuummmumumummummumunummummuunmuumwo', 'governmentsupported', 'immmotzmotatmtmommzum', 'mmommomeommmmozmommotrmmgramopagr', 'heaven-enlightened', "postmaster-general's", 'mmgrommmmommgrmommmoromrmonorz', 'succeedinggenerations', 'imememememeinimeimii', 'siiiiiiilitaiiiiiiiiiaisill', 'self-contradictory', 'ostammosanosonsorr', 'rnitivittiltifirmi', 'muniummmitimlinini', 'ormucesemmommannumorammosimemaamoutammovomnumeammnommukumumonmustormmummunno', 'counterdemonstrations', 'i-ifidairicliiiriiirroi', 'thefactthattheyinvolvethevitalprinciple', 'inmpaiavimmipamipammmiximp', 'demonstrainfluence', 'nunnnunnuuuuuuuuuuunnuuuuuuuuuunnnuuunmuuuumusuuuuunuuuuuuuuuuuuuuuununnnnnunuuuuunuuuuuuuuunuwuum', 'immumimilimitmliminiiimiiiiiiiiiiiiiiiiiiiimmiiiiiiiiiiimmintmill', 'milliummiumunmionwimmimmiumr', 'weiverreitaararforreahaarivitoroyerriiivii', 'vriliriiifiertailitarectrinfeltriatiatictitlifie', 'iiimuumimiiiiiimhomidfinnlinlinnflunnhohhohimimhhommilinlinflo', 'maher-shalalhash-baz', 'jerusalem-to-jericho', 'church-and-state-union', 'mimmomiosomosoissoisioissossosivissossicsiiiss', 'hriimmiiiiimeimiiimihni'], 17)
Remove long tokens with long strings of "m", "i", "l", "x"
In [35]:
# %load shared_elements/remove-tokens-with-long-strings-of-characters.py
prev = "correction7"
cycle = "correction8"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
replacements = []
replacements.append(clean.check_for_repeating_characters(tokens, "m"))
replacements.append(clean.check_for_repeating_characters(tokens, "M"))
replacements.append(clean.check_for_repeating_characters(tokens, "i"))
replacements.append(clean.check_for_repeating_characters(tokens, "I"))
replacements.append(clean.check_for_repeating_characters(tokens, "l"))
replacements.append(clean.check_for_repeating_characters(tokens, "x"))
replacements.append(clean.check_for_repeating_characters(tokens, "X"))
replacements.append(clean.check_for_repeating_characters(tokens, "u"))
replacements.append(clean.check_for_repeating_characters(tokens, "n"))
replacements = [item for sublist in replacements for item in sublist]
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
LibM19080101-V03-01-page1.txt: [('wimmummtmmuntifiummiummiummmommumwffimmiummummummulmtmminammmmunnummmumummummumummr', ' '), ('JiuWuuWnnWumllonllllllllhIHIIUIIUIHUL', ' ')]
LibM19080401-V03-02-page1.txt: [('ifaimitialiumuumnimimmtmimummuimmunimiummitinimminimmumminummumunnommumminumninummunim', ' '), ('mutummimmomminumummummumunimmiumummummlimummmumumunummtimummimintowitmmummrx', ' '), ('hummmtimmmummummummore', ' '), ('lllllllllllllllllllllllllll', ' '), ('lllllllllllllllll', ' ')]
LibM19080701-V03-03-page1.txt: [('muminatatimiumumuutumitimmittimmummminnumminumuffiummumummunnomiminummuummummimmumnini', ' '), ('Eimmiumiiiiiiimmumummiiimumillimimminimumaniumiffiffimmiummuumniimmommumiummlinmmiumullimmi', ' '), ('MMIMMIMMIMUMMIP', ' '), ('Eimmiumiiiiiiimmumummiiimumillimimminimumaniumiffiffimmiummuumniimmommumiummlinmmiumullimmi', ' ')]
LibM19081001-V03-04-page1.txt: [('flummimmumommifiumwmffimmumnimmimummlimmumimmmunlimmmummmumuummumummlimuummumumumung', ' '), ('MMUMIIMMUMIUMMIMMIIMMIIIMIMMIWIMMUMBIUMUMMUIMMIIMMWIMUMMUMMIRMIUMIE', ' '), ('HumilffilitiffinumMIffiiimminlIMMUMMIIIIIIIIIMM', ' '), ('MMUMIIMMUMIUMMIMMIIMMIIIMIMMIWIMMUMBIUMUMMUIMMIIMMWIMUMMUMMIRMIUMIE', ' '), ('UMMIIIIIIMIIIIIM', ' ')]
LibM19090101-V04-01-page1.txt: [('iumitimumummumunnumaintimmummumumiumummumummtunmitimuumminnimuummuummumminumismiumnimmuntimmmuthw.', ' '), ('iumitimumummumunnumaintimmummumumiumummumummtunmitimuumminnimuummuummumminumismiumnimmuntimmmuthw.', ' ')]
LibM19090401-V04-02-page1.txt: [('HMHIMIMMIIIIIIMMIHMIIMUMM', ' '), ('PIIHNUMMUUMBHIMURUNIMENHOMUUMMUNUNIMMINHHOHUMINUMUUNUMMUNNUHHHIMMINBUI', ' '), ('HMHIMIMMIIIIIIMMIHMIIMUMM', ' '), ('unnnnnnunnmamnumununnmMunmNIIIMM', ' ')]
LibM19090701-V04-03-page1.txt: [('ummuummutmummuummmiummummummummumumminummonummunummmummuummuuttimmumut', ' '), ('MIUMMUIIIIIIIIIIMMUMMMIIIIIHIMMMUMMIMMUMMIMMIMI', ' '), ('MIUMMUIIIIIIIIIIMMUMMMIIIIIHIMMMUMMIMMUMMIMMIMI', ' '), ('lllllllllllllllllllllllllllllllllllllllllllllllllllllllllll', ' '), ('ummuummutmummuummmiummummummummumumminummonummunummmummuummuuttimmumut', ' ')]
LibM19100101-V05-01-page1.txt: [('nmommumammammunnumumuuM', ' '), ('MUMMMUMMIMMIMMIMMIMMIIIIMMIIIIMMIUMMIMMIIMIMMIMMIMMIIHIMMIIMMI', ' '), ('rilifininniiMUMMAIMUMEIMiniiiiiiiniiiiiiiiiiiiiiiiiiMMPUMUMMHIMUMWOMMIIMMIIMMIIIIIMMIMMIMMIMIUMNIMIMMIM', ' '), ('rilifininniiMUMMAIMUMEIMiniiiiiiiniiiiiiiiiiiiiiiiiiMMPUMUMMHIMUMWOMMIIMMIIMMIIIIIMMIMMIMMIMIUMNIMIMMIM', ' '), ('MUMMMUMMIMMIMMIMMIMMIIIIMMIIIIMMIUMMIMMIIMIMMIMMIMMIIHIMMIIMMI', ' '), ('rilifininniiMUMMAIMUMEIMiniiiiiiiniiiiiiiiiiiiiiiiiiMMPUMUMMHIMUMWOMMIIMMIIMMIIIIIMMIMMIMMIMIUMNIMIMMIM', ' '), ('lliilligijnirMli"Illj', ' ')]
LibM19100401-V05-02-page1.txt: [('unummonummtummunialliimumiir', ' '), ('ommumniummunuimiumuutimutimmulummimmiummintomunmumumummumumumnomminuninumninummumumummtuntiummirt', ' '), ('WIMIIMilliiiiiiiiiiiiimmimmithiiiiimmumminunifiniiiii', ' '), ('mommummmufflummunmuummmutommummmuummmumumummummumunummummuunmuumwo', ' '), ('MMIIIIMUMHIMIMMIIIIIMM', ' '), ('iiiimmummiatUMBIIIIIMBIMUMMIIIIIIIMMIMM', ' '), ('WIMIIMilliiiiiiiiiiiiimmimmithiiiiimmumminunifiniiiii', ' '), ('iiiimmummiatUMBIIIIIMBIMUMMIIIIIIIMMIMM', ' '), ('mommummmufflummunmuummmutommummmuummmumumummummumunummummuunmuumwo', ' ')]
LibM19100701-V05-03-page1.txt: [('imummintommumnimminumummmummlimmunumummummmunumumminutimmummmitimumnimmm', ' '), ('immuunumummummmtuummummiumunumtumffimmmutummunmuu', ' '), ('immuunumummummmtuummummiumunumtumffimmmutummunmuu', ' ')]
LibM19101001-V05-04-page1.txt: [('MIMMIMMUMMIIIMMIMII', ' '), ('ANIMMIIMMIMMIIMMIIMIMMIWIMMUMWHIMOMMOMMIIMMIHMUMMIMIUMMIMMEMIIMMUMMUMMENUMIIIIMMUUMMINUMMIS', ' '), ('EMIMMIUMMEHMEMEIMMIMMINIMMUMMMINIMMEMINIMMUMUMMEMMINIUMMUNIEli', ' '), ('IIIMIUMIUMMUMMIMMOMINIMMIMMIMMIMMIHMIMMIIMMINIMUMMUMMIMMUMUMMEMIMMIMMIMM', ' '), ('ANIMMIIMMIMMIIMMIIMIMMIWIMMUMWHIMOMMOMMIIMMIHMUMMIMIUMMIMMEMIIMMUMMUMMENUMIIIIMMUUMMINUMMIS', ' '), ('MINIMMIIIIIIIIIIMIHWOHIMUlla', ' '), ('liMMIIIIIMIIIIIIIIIIIIIIIIIIIIIIIIIIIIIE', ' ')]
LibM19110101-V06-01-page1.txt: [('...ffiummummiummunnummumummmumumummummunamummunummuumummmunummunnummumummumnitumnims', ' '), ('muummmunnummonmummumuummmunimmuPm.n', ' '), ('IIMMIIMMUMMUIMMIMUMMWIMUMMMIMMIMIMMIUMMMUMMUMINWIMBHMMIMMIliniffillinnUffiffill', ' '), ('mmniummiunrimiiiiiiiilfmfiinotoiiimun', ' '), ('WMIWIMIIIIIIIIIIIIIIIMMIUMM', ' '), ('iullnnunulnmmuumnuluunnunuumlt', ' ')]
LibM19110401-V06-02-page48.txt: [('MMMMMIMIMMMMMMMMM', ' '), ('MMMMMMMMMMMMMMMMME', ' '), ('IMMEgMMMMMMMMMMMMMMMMMR', ' '), ('MMMMMMMMMIMMM', ' ')]
LibM19110401-V06-02-page49.txt: [('xxxxxxxxxxxxxxxxxxxxxx', ' '), ('XXXXXXXXXXXXXXXXXXXXX', ' ')]
LibM19110701-V06-03-page1.txt: [('pimumwmummuniumummtimmtunit', ' '), ('Milliummiumunmionwimmimmiumr', ' '), ('iiimillilintirniMMIMMIIIIIiiiiiiiiiitiiiiiiiiiiiiiffilii', ' '), ('IIIIIIIIMMIIIIIIIIIIIIIMMIUM', ' ')]
LibM19110701-V06-03-page48.txt: [('FAIPMKRIVMRIIYAMKRKILSRIIIRRRRIIIIRRRRISIKV', ' ')]
LibM19110701-V06-03-page49.txt: [('xxxxxxxxxxmocxxxxxxxx', ' '), ('XXXXXXXXXXXXXXXXX', ' '), ('XXXXXXXXXXXXXX', ' ')]
LibM19111001-V06-04-page49.txt: [('XXXXXXXXXXXXXXXX', ' '), ('XXXXXXXXXXXXXXXXXXXXXX', ' '), ('XXXXXXXXXXXXXXXXXX', ' ')]
LibM19120401-V07-02-page49.txt: [('mezmommommonommomommommommommmom', ' ')]
LibM19120401-V07-02-page50.txt: [('mnrummommommoncommommmown.mo', ' '), ('mmgrommmmommgrmommmoromrmonorz.', ' ')]
LibM19120401-V07-02-page51.txt: [('nrmomoommomomrsoommommokmagmkwon.', ' ')]
LibM19120701-V07-03-page49.txt: [('Emmonmenommomumommommommonotrnommirnmn', ' ')]
LibM19120701-V07-03-page50.txt: [('XraMOXMOZraMOMMOMMOCMOMMMOMMX', ' '), ('MMOMMMOMMOMMOMMMOM', ' ')]
LibM19120701-V07-03-page51.txt: [('rsomravramcmotrammragmonommxmommansom', ' '), ('mozmnmwommolzemrammonommommommommn', ' '), ('MOMa.netlftrrMMIDHIMIZMMOMMOMMILVMM', ' ')]
LibM19121001-V07-04-page3.txt: [('MMOMMOMMIMOMMOTIMMOTMM', ' ')]
LibM19130101-V08-01-page3.txt: [('MMOMMOMEOMMMMOZMOMMOtrMMgraMOPagr', ' ')]
LibM19130101-V08-01-page4.txt: [('ragMMMMMMMMMMMMMMMMMMMMIMMM', ' ')]
LibM19130401-V08-02-page3.txt: [('MOMEOECIMMOIMOMMOMOMMOIXIMM', ' ')]
LibM19130401-V08-02-page4.txt: [('wralrammimmrzrznomnommgmmonom', ' ')]
LibM19130401-V08-02-page52.txt: [('mmozmrommomommonorummanoz', ' ')]
LibM19130701-V08-03-page3.txt: [('MMOHCOMEMMAragraanilMMMOHM', ' ')]
LibM19140101-V09-01-page3.txt: [('XECTIMMECEMOMMOMMIIMOMMOMMOMME', ' ')]
LibM19140101-V09-01-page4.txt: [('IMMEMMOMMOMMEMN', ' ')]
LibM19140401-V09-02-page3.txt: [('MMMEAMMMMMMMMMMMMMMMMMMMEMMMMM', ' '), ('MMMMMMMMMMMMMMMMMnS', ' ')]
LibM19140401-V09-02-page4.txt: [('MEENMENMMMMMMMMMaWKNMMMMMEg', ' '), ('MRNMIMMMM-riMMMMMMOAMRIMMMMM', ' ')]
LibM19140701-V09-03-page4.txt: [('mmmmmmnswmmEsImmornmmmmmm', ' '), ('MMMMMMMMMMMMMMMMMMMMMM', ' '), ('EHMMMMMMMMMMMMMMMMMMMMMMMMMMMMM', ' ')]
LibM19140701-V09-03-page52.txt: [('.ormucesemmommannumorammosimemaamoutammovomnumEammnommuKumumonmustormmummunno', ' '), ('aommommemsatammogarmaxsorarmwelimMelinuilmenomPommixliniewtlominermiimmurpimumnuommurM', ' ')]
LibM19141001-V09-04-page4.txt: [('MMEMMMMMMMMMMEEMMMEMMMMMMMMMM', ' ')]
LibM19141001-V09-04-page49.txt: [('LMIIIIIIIIIIIIIIIIIII', ' ')]
LibM19141001-V09-04-page50.txt: [('MMMMMMMMMMMMM', ' ')]
LibM19150101-V10-01-page3.txt: [('mmmmommmmmmmwmnirimEmmmmmm', ' '), ('EMMMMMMMNiMMMMMM', ' ')]
LibM19150101-V10-01-page4.txt: [('Mmmnwnsommmmmmmmrmmmolm', ' '), ('MMMMMMMMMMMMMMMMMMMX', ' ')]
LibM19150101-V10-01-page52.txt: [('MMMMMMMMMUIMMMMMMRIMMMMMMMMMMMMMMM', ' ')]
LibM19150401-V10-02-page3.txt: [('MMMMMMMMERMMMMMMMMMMMMMMM', ' ')]
LibM19150401-V10-02-page4.txt: [('MMMMMNEMMOMMMNMMMME', ' '), ('ESNMEMMMMMMRiMMMMEENMMEMMM', ' ')]
LibM19150401-V10-02-page49.txt: [('Illlllulllllllllllllllllllllllllllll', ' ')]
LibM19150701-V10-03-page3.txt: [('MMMMMMffiRiMMMMMMMMMMMMEMMMMMMM', ' ')]
LibM19150701-V10-03-page4.txt: [('moNmmaimmEnimmmmmmmEmm', ' '), ('NFIEMMEEMMEEMMMEEEEEEEEEEEEMEEEM', ' ')]
LibM19151001-V10-04-page48.txt: [('MgiMMMMMMMMMMMMMMMMMMMMMMMMMMMM', ' '), ("MMMOSMERMSIMMMEMMNM'iligH", ' ')]
LibM19151001-V10-04-page49.txt: [('yffinsmEmmmmmmmwmswmmmmmmmnim', ' '), ('MMMMEiMMIMMMMMMMMMMMMMMTTMMMMMTIM', ' ')]
LibM19151001-V10-04-page50.txt: [('Yhmommomownwmmmmmmm', ' ')]
LibM19160101-V11-01-page4.txt: [('MMI.I.I.IIWIFINMOMM...MM.M', ' ')]
LibM19160101-V11-01-page51.txt: [('rnomommonoszuzummummanmmollommom', ' ')]
LibM19160101-V11-01e-page1.txt: [('immumnitommuummunitimmtwuntnimmummiona', ' ')]
LibM19160401-V11-02-page26.txt: [('t..glilihiliiiiiiraiii', ' ')]
LibM19160401-V11-02e-page1.txt: [('maimimiummaimmismilinuminutimmuminiumilmmitimmummumwmoimminummiumnimmititilowinitimiiiti', ' '), ('lllllllllllllll', ' ')]
LibM19170701-V12-03-page3.txt: [('IIIIIIIIIIIII', ' ')]
LibM19171001-V12-04-page19.txt: [('YIIIIIIIIIIIIIII', ' ')]
LibM19171001-V12-04-page3.txt: [('iiiiiiiiiii.c.ii', ' ')]
LibM19180101-V13-01-page18.txt: [('Lffiffimffithimmouninoffimmommummuommunimmonwiniiiiminnumumminriumminlimminiiiiiiiiiiimmonimum', ' '), ('Lffiffimffithimmouninoffimmommummuommunimmonwiniiiiminnumumminriumminlimminiiiiiiiiiiimmonimum', ' ')]
LibM19180101-V13-01-page19.txt: [('inimiiiiiminunimmilimumusinominimuninimmilmr.anumilimminiumminnimminumminnummiliniummiliml', ' ')]
LibM19180401-V13-02-page3.txt: [('ossionosollSoMaSISMISIIIIIISIIIMISSIMISOMallaallallaillafflUSS', ' '), ('USSOSIIIMIWOHMIIIIIIIISMISOISISIIIIMinallIOISOISOISoisosiososi.', ' '), ('ossionosollSoMaSISMISIIIIIISIIIMISSIMISOMallaallallaillafflUSS', ' ')]
LibM19180701-V13-03-page3.txt: [('smossmunssunommummusnmussmssmissussmsmussmmssmissmossmussussummmmusstmosssmsmssmnnsmimmumsmimmwsrmossumms', ' '), ('ssumsffismssumusummummtmussessumnsumussunstsmossmossmwsussmumunnmunsummossumsnwssumminimmsnintminimmusmussinissunues', ' ')]
LibM19181001-V13-04-page16.txt: [('iiiiiiiiiiiiiiiiiiiiiii', ' ')]
LibM19181001-V13-04-page3.txt: [('nifilnIMIfintilllillflillifilnifilmiummiiffillfill', ' ')]
LibM19190101-V15-01-page3.txt: [('siiiiiiilitaiiiiiiiiiaisill', ' '), ('alliallallialliallaillaSSIIIIIIIIIIIMIIIIIIIIIIiiim', ' '), ('alliallallialliallaillaSSIIIIIIIIIIIMIIIIIIIIIIiiim', ' ')]
LibM19190401-V15-02-page14.txt: [('IIIIIIIIIIIIIIII', ' '), ('HIIMIIIIIIIIIIIIIIIINIIUMIUMINIMUI', ' ')]
LibM19190401-V15-02-page15.txt: [('IWIIIIIIIIIIIIIIIIII', ' ')]
LibM19190401-V15-02-page18.txt: [('iiiiiiiinillitiiii', ' ')]
LibM19190401-V15-02-page3.txt: [('immumimilimitmliminiiimiiiiiiiiiiiiiiiiiiiimmiiiiiiiiiiimmintmill', ' '), ('atssussusumoususissonclaCIIIMMIIMMISISCOMMISSI', ' '), ('immumimilimitmliminiiimiiiiiiiiiiiiiiiiiiiimmiiiiiiiiiiimmintmill', ' '), ('.MMIMUMUMWWWIIIIIIIIIIIIIIIIIIIIIIIIIIIIIlleeleteeemememme', ' ')]
LibM19190701-V15-03-page3.txt: [('IIIIIIIIIIIIIIIIIIII', ' '), ('IIIIIIIIIIIIIIIIIIIII', ' ')]
LibM19190701-V15-03-page4.txt: [('IIIIIIIIIIIIIII', ' ')]
LibM19191001-V15-04-page14.txt: [('miiiiiniiiiiiimilimiiiiiiiiiiimiliiiiiimmionimmiumingiiiiiiiiiiiimmiliiiimmomiiiiminwiliiiiiiiiiiiiiiiiiinminsummuilimiliiinimonnimmiiiiiiiiiiiiiiiiiiimiimiq', ' '), ('miiiiiniiiiiiimilimiiiiiiiiiiimiliiiiiimmionimmiumingiiiiiiiiiiiimmiliiiimmomiiiiminwiliiiiiiiiiiiiiiiiiinminsummuilimiliiinimonnimmiiiiiiiiiiiiiiiiiiimiimiq', ' ')]
LibM19191001-V15-04-page17.txt: [('mimmutinsimiunimminimmummusilinnimmimuminnumminnimmummilinuffisliinummimilmilimitimiumminniiniiitimitimmimmilimititinnum', ' '), ('.itmlinillitiniiimmullimitilittiminunitiffitiminimmituniumnitmitilistimmilimutiiiiiimitimitintiumnimmummitm', ' '), ('unimilismimitimittnismitimmimittlimummumitemitimmummmintimmimiumiumnitimllminiummuntiummilmi', ' '), ('mimmutinsimiunimminimmummusilinnimmimuminnumminnimmummilinuffisliinummimilmilimitimiumminniiniiitimitimmimmilimititinnum', ' '), ('.itmlinillitiniiimmullimitilittiminunitiffitiminimmituniumnitmitilistimmilimutiiiiiimitimitintiumnimmummitm', ' '), ('mimmutinsimiunimminimmummusilinnimmimuminnumminnimmummilinuffisliinummimilmilimitimiumminniiniiitimitimmimmilimititinnum', ' ')]
LibM19191001-V15-04-page27.txt: [('IIIMUUMIMIIIIIIMHOMIDfinnlinlinnflUnNHOHHOHIMIMHHOMMIlinlinflO', ' ')]
LibM19191001-V15-04-page28.txt: [('HIIIIIIIIIIIIIIIII', ' '), ('IIIIIIIIIIIlIIIIIIII', ' '), ('IIIIIIIIIIIIIIIIIII', ' '), ('IIIIIIIIIIIIIIIIIIIII', ' ')]
LibM19200401-V14-02-page16.txt: [('MMIERRIOTITIMMIIEMBITIMIIMERIMIIM', ' '), ('HRIIMMIIIIIMEIMIIIMIHNI', ' ')]
LibM19200401-V14-02-page28.txt: [('IIIIIIIIIIIIIIIIIIIIIIIIIII', ' ')]
LibM19200401-V14-02-page3.txt: [('iiimmiiiiii..', ' ')]
LibM19200401-V14-02-page31.txt: [('.e-illmllommimilimmilummumenimilmnimuningumminumiiiiimilmimmunimifinnilionontimmigimiliiimiffiliffilimiliiiiiiiiiiiiimm', ' '), ('.e-illmllommimilimmilummumenimilmnimuningumminumiiiiimilmimmunimifinnilionontimmigimiliiimiffiliffilimiliiiiiiiiiiiiimm', ' ')]
LibM19200401-V14-02-page32.txt: [('smmusissommummusismussmimussissusissimmimmiiiiiiium', ' ')]
LibM19200401-V14-02-page36.txt: [('mcommommommuommommonomm', ' '), ('nunnnunnuuuuuuuuuuunnuuuuuuuuuunnnuuunmuuuumusuuuuunuuuuuuuuuuuuuuuununnnnnunuuuuunuuuuuuuuunuwuum', ' '), ('nunnnunnuuuuuuuuuuunnuuuuuuuuuunnnuuunmuuuumusuuuuunuuuuuuuuuuuuuuuununnnnnunuuuuunuuuuuuuuunuwuum', ' ')]
LibM19200401-V14-02-page5.txt: [('ENEIIIIIIIESEIIIE', ' ')]
LibM19200701-V14-03-page12.txt: [('rrigtreatiariiirriiriiiiriiiitriVittiOriiilrricTiiilriiiitii', ' '), ('NiViitiesiAliffiliifiiilrimlnii', ' ')]
LibM19200701-V14-03-page15.txt: [('racTIMIriiiiitiriiltililietcliteiViiVittiiiitiEVAlifittiA', ' ')]
LibM19200701-V14-03-page8.txt: [('iitiiiitiWiiiititiVaititigniFittaiiiitQaWilitilitit', ' ')]
LibM19201001-V14-04-page3.txt: [('iiiiiiilliniemniiiiii', ' ')]
LibM19201001-V14-04-page35.txt: [('mouaamaaammmaaaaaaaaamamanmmammmammimaaaaaamaaaaaammIaaaaaa', ' '), ('MOErlrlrEEMOINMEMMOMMMOMMIKUMOE', ' '), ('IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII', ' ')]
In [36]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction8 Average verified rate: 0.981664985503412 Average of error rates: 0.03307226705796038 Total token count: 1451376
In [37]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[37]:
[("'", 1499),
('m', 1313),
('d', 1253),
('e', 997),
('w', 951),
('t', 834),
('n', 773),
('r', 677),
('f', 631),
('g', 383),
('x', 271),
('u', 206),
('k', 192),
('tv', 150),
('th', 117),
('pa', 100),
('sunday-law', 92),
('z', 82),
('ex', 75),
('io', 71),
('id', 71),
('co', 64),
('postmaster-general', 62),
('re', 59),
('ga', 58),
('post-offices', 57),
('mo', 57),
('un-american', 57),
('statute-books', 56),
('va', 56),
('sunday-closing', 54),
('church-and-state', 49),
('mm', 45),
('q', 44),
('un', 43),
('mt', 42),
('attorney-general', 41),
('tion', 40),
('sunday-rest', 39),
('wm', 38),
('pp', 38),
('charta', 37),
('ro', 35),
('li', 35),
('neander', 31),
('-', 30),
('seventhday', 30),
('mi', 28),
('ky', 28),
('religio-political', 27)]
In [38]:
reports.long_errors(errors_summary, min_length=15)
Out[38]:
(['countermemorialists', 'theconstitutionof', 'well-intentioned', 'antiprohibitionists', 'immmotzmotatmtmommzum', 'sundayobservance', 'disease-resisting', 'less-enlightened', 'vuaziffiemunimeluitennotinutnnifin', 'cavendish-benand', 'comizairadtgicao', 'church-and-stateunion', 'constitutionalty', 'california-nevada', 'winnington-ingram', 'iiiirreriiitlhinifid', 'boarding-schools', 'simmmismwklaiigitil', 're-establishment', 'publishing-houses', 'rwiumwimmiiiiimimmumnii', 'pilurprmarasigimmt', 'preventivejurisdiction', "religio'political", 'miilmilliiimilliifilmidid', 'enosnantiemotainotientetiemtio', 'one-day-in-seven', 'elanornelkiiisre', 'relies-political', 'narrow-mindedness', 'harmless-looking', 'seventh-day-observing', 'jeradycerelsolid', 'ititeiltintonecfctration', 'non-commissioned', 'migininaugimmikimmu', 'latitteilommtwtfifolror', 'mvstimpsmgrecuttliv', 'mgraotrtraccommozraglgraccommicami', 'ex-vice-president', 'prcestantissinium', 'iiiiiiiiiiimmiumulinuilmilne', 'above-referred-to', 'fourteen-per-cent', 'better-established', 'nemmiwiiiimortrinl', 'counter-petitioners', 'inimlfilninninilli', 'non-intoxicating', 'gawavaiaaamminonwirit', 'blood-guiltiness', 'mememeeememememe', 'whowrotethefamous', 'penmenisrisdinaorabsesiceewer', 'affindlitilffilillikvillehd', 'life-disagreeable', 'twice-interrupted', 'antiecclesiastical', 'tinitoriinlintol', 'politico-religious', 'much-appealed-to', 'religiopolitical', 'glillilisibffille', 'unemeeeeeeneeleeneeetelli', 'omortioionososom', 'vaaffisl-co-pacific', 'statesprinciples', 'eimf-immmmnmnmlne', 'religion-and-state', 'rwraeadttchehman', 'long-established', 'msossgmaiaassmgeamakawmalnarlaa', 'lecosniiionpainoticsovicesfirde', "linunimmimrs'inumumu", 'litico-religious', 'rrrprrrrrrrritrrrf', 'lamjukgmdavagixiatm', 'toforeigncountries', 'tixtreciremyemiresnirtiortiorrioritortiorrii', 'democraticrepublican', 'medico-actuarial', 'controversialist', 'hihinhiniiiiiiirin', 'wind-instruments', 'twenty-four-hour', 'two-and-a-halfmile', 'exemption-clause', 'estateifpuprenle', 'commander-in-chief', 'self-determination', 'nmmmommrsonomrznemonmonomnrmotruomonom', 'feeble-mindedness', 'tsereanctosrothciertny', 'church-membership', 'snlrnuurinunuununa', 'rimareinsmiummisimememesiermem', 'tiarezemieeleismikiimeeemiewew', 'emerhilsamalsinalso', 'non-interference', 'burckhardt-schatzmann', 'constitution-makers', "the'constitution", 'feemowiwiedimeiersig', 'one-day-rest-inseven', 'postmaster-general', 'establishingreligious', 'one-day-rest-in-seven', 'shriveled-souled', 'emelieniwionsavibannotisloneemite', 'vice-president-elect', 'self-renunciation', 'self-disciplined', 'innocent-looking', 'fourteen-year-old', 'state-established', 'civilinstitution', 'quasi-conquerors', 'inter-brotherhood', 'anti-evangelical', 'ramtersimrammemarkirracarmermartm', 'mmipoinnonfoemnnioannim', 'nomenegvoicedienast', 'actof-parliament', 'inoomalloisossimis', 'religion-andstate', 'sixteenth-century', 'erimmuralcotemurc', 'uncommercialized', 'iillrieeiaiiirriardi', 'agaomoorwairalioigtiargial', 'ihilibillilltreterita', 'publishing-house', 'vagtookagtookaog', 'selfpreservation', 'secretary-ofthe-interior', 'conscience-fettered', 'maramommraosommu', 'campbell-bannerman', 'character-making', 'religio-political', 'impreeloreesocoeselaal', 'dyed-in-the-wool', 'attorney-general', 'cannikin-clinking', 'ffassininsonsiwoloolgasers', 'lieutenantgovernor', 'jtuemmmmmwinimnir', 'non-sunday-observing', 'mheminuffinfillffilimis', 'sabbath-breaking', 'inforfaisiomomincomocadoviemmigoimiwa', 'sssssssssssssssssssss', 'statesman-preacher', 'prcestantissimum', 'tully-wainwright', 'inter-denominational', 'assumedjimperial', 'statute-preserved', 'nosonmomorwemcwaint', 'reconstructionists', "representatives'", 'iiiwtierttititiiiit', 'no-religious-test', 'ipuitnilinimilliiiinulillluunii', 'friemoossmormior', 'wamegkimnmrummmmesemvmmmrmk', 'miraglia-gullotti', 'nininimummujimininlini', 'self-glorification', 'heaven-appointed', 'sunday-amusement', 'self-aggrandizement', 'avinavvswoirliag', 'word-controversy', 'religious-sabbatic', 'iitoitllislossoliiosill', 'money-worshiping', 'intheszealwarfejrrnicenathemoatiry', 'palace-befitting', 'religio-constitutional', 'personal-heart-conversion', 'american-mexican', 'state-controlled', 'personal-liberty', 'much-to-be-desired', 'curiosity-gratifying', 'jskadmemmomendim', 'mconslfaitmeegtifo', 'relpresentatives', 'non-sectarianism', 'emsmwmmmwmmmnmhoneni', 'governor-general', 'selfaggrandizement', 'religionaboveall', 'faimmeigegrommegfa', 'satisfactostruction', 'sunday-amendment', 'miommooomoomsoicimuchmusuoihiuoimisiummicosississinasseeememeescs', 'monommomozragrammxragnm', 'statuteintrenched', 'teiiiiriafinemie', 'pecsetemmeltigazolom', 'trgatimedimegoovemotwo', 'scandalousassault', "attorney-general's", 'rimmineiiiiiiiiimirre', 'ex-congregational', 'sunday-enforcement', 'beverage-factory', 'selfgratification', 'nitroenrtenaddlimeg', 'mititayerwiriiiinicrierier', 'sundaymailreports', 'liberty-imparting', 'twenty-four-hour-day', 'liberty-bestowing', 'special-delivery', 'misunderstanaing', 'mimmuiummommosowl', 'criiitriatoyearetriarmireirntrecltwieviretriarctieanyaremiractmiteetreowehatio', 'state-and-religion', 'compulsory-sunday-law', 'unconstitunation', 'gishrimmmmomnmon', 'associate-justices', 'secretary-of-war', 'consaalermtooldlny', 'unanswerableness', 'infludemonstrated', 'self-destructive', 'counterdenunciations', 'church-dominated', 'religio-politico', 'thanksgiving-day', 'countermemorials', 'windsor-on-hudson', 'obviouslyagreement', 'busideteriorating', 'one-dayof-rest-in-seven', 'state-intrenched', 'attendstipulating', 'entlimimmimiemil', 'warm-heartedness', 'illrilohlietflir', 'history-confirming', 'semi-ecclesiastical', 'secular-rest-day', 'tomplonsesolomerol', 'intelligent-looking', 'ivosengtoexirmemed', 'suspension-bridges', 'self-righteousness', 'near-prohibition', 'day-rest-in-seven', 'statute-enforced', 'weiverreitaararforreahaarivitoroyerriiivii', 'five-million-dollar', 'twentieth-century', 'politico-ecclesiastical', 'counterallegations', 'alaska-yukon-pacific', 'ecemoictiemememoodemeeeme', 'sabbath-breakers', 'commandment-keepers', 'trothofabusesandegurpatent', 'trading-with-the-enemy', 'go-to-churchor-stay-indoors', 'bureau-of-military-intelligence', 'seventeenth-century', 'self-destruction', 'kiderlen-waechter', 'betterthan-thous', 'antitrinitarians', 'act-of-parliament', 'self-preservation', 'prescott-wilson-tumulty', 'post-reformation', 'brigadier-general', 'government-makers', 'emirmeilsaarsinemiliehmee', 'rograssmargmeermirl', 'ttimilimmumulminnittinitintinninitutimmi', 'frankfort-on-the-main', 'consumption-cure', 'disestablishtvent', 'church-collective', 'religious-legislation', 'intfilnilhimirimihimmihimiirminlnimimiriminiimium', 'one-day-of-rest-in-seven', 'fillikifineffilia', 'parochial-school', 'over-encouraging', 'uvrapsimisulswipampiampv', 'eadergettlevaaled', 'lieutenant-colonel', 'all-comprehensive', 'double-mindedness', 'man-administered', 'counter-movement', 'half-pintof-claret', 'counter-argument', 'world-conscience', 'parochial-school-system', 'ztkirmintzflrmerifranc', 'mramiluesimairrimamesiemiamemilie', 'agretiitilitltitstriffigtisifitiveram', 'microbedestroying', 'wommiumniffunivirlsoir', 'maher-shalal-hash-baz', 'governmentsupported', 'misrepreapproved', 'heaven-enlightened', 'five-hundred-word', 'incomparabiabove', "postmaster-general's", 'succeedinggenerations', 'imememememeinimeimii', 'sunday-observance', 'restaurant-keeper', 'generous-hearted', 'self-contradictory', 'seven-daysa-week', 'ostammosanosonsorr', 'rnitivittiltifirmi', 'muniummmitimlinini', "will-o'-the-wisp", 'counterdemonstrations', 'i-ifidairicliiiriiirroi', 'church-and-state', 'thirty-five-gram', 'thefactthattheyinvolvethevitalprinciple', 'imeemiumeemeemene', 'inmpaiavimmipamipammmiximp', 'demonstrainfluence', 'all-day-everyday', 'self-constituted', 'notpersonalities', 'antichristianism', 'self-condemnatory', 'trust-and-combine', 'separationwhichis', 'director-general', 'vriliriiifiertailitarectrinfeltriatiatictitlifie', 'super-government', 'maher-shalalhash-baz', 'jerusalem-to-jericho', 'nolimmowiftwommr', 'copy-thirty-five', "school-teachers'", 'church-and-state-union', 'mimmomiosomosoissoisioissossosivissossicsiiiss'], 15)
Correction 9 -- Split Squashed Words¶
In [39]:
# %load shared_elements/separate_squashed_words.py
import pandas as pd
from math import log
prev = cycle
cycle = "correction9"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
verified_tokens = []
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
clean.get_approved_tokens(content, spelling_dictionary, verified_tokens)
tokens_with_freq = dict(collections.Counter(verified_tokens))
words = pd.DataFrame(list(tokens_with_freq.items()), columns=['token','freq'])
words_sorted = words.sort_values('freq', ascending=False)
words_sorted_short = words_sorted[words_sorted.freq > 2]
sorted_list_of_words = list(words_sorted_short['token'])
wordcost = dict((k, log((i+1)*log(len(sorted_list_of_words)))) for i,k in enumerate(sorted_list_of_words))
maxword = max(len(x) for x in sorted_list_of_words)
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = utilities.strip_punct(content)
tokens = utilities.tokenize_text(text)
replacements = []
for token in tokens:
if not token.lower() in spelling_dictionary:
if len(token) > 17:
if re.search(r"[\-\-\'\"]", token):
pass
else:
split_string = clean.infer_spaces(token, wordcost, maxword)
list_split_string = split_string.split()
if clean.verify_split_string(list_split_string, spelling_dictionary):
replacements.append((token, split_string))
else:
pass
else:
pass
else:
pass
if len(replacements) > 0:
print("{}: {}".format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
LibM19080101-V03-01-page22.txt: [('thefactthattheyinvolvethevitalprinciple', 'the fact that they involve the vital principle')]
LibM19080401-V03-02-page22.txt: [('countermemorialists', 'counter memorialists')]
LibM19090101-V04-01-page10.txt: [('satisfactostruction', 'sat is fact o st r u c t i o n')]
LibM19090701-V04-03-page34.txt: [('ffaSSININSONSIWOloolgasers', 'f f a S S I N I N S O N S I W O l o o l g a s e r s')]
LibM19101001-V05-04-page1.txt: [('AffindlitilffilillikVillehd', 'A f f i n d l i t i l f f i l i l l i k V i l l e h d')]
LibM19110401-V06-02-page49.txt: [('msossgmAIAASSMgEAMAKAWMALNARLAA', 'ms o s s g m A I A A S S M g E A M A K A W M A L N A R L A A')]
LibM19111001-V06-04-page18.txt: [('obviouslyagreement', 'obviously agreement')]
LibM19120101-V07-01-page50.txt: [('Toforeigncountries', 'To foreign countries')]
LibM19121001-V07-04-page6.txt: [('tomplonsesolomerol', 'tom p l on s e s o l o m e r o l'), ('IMpreeloreesocoeselaal', 'IM p reel ore e s o c o e s e l a a l'), ('emerhilsamalsinalso', 'e m e r h i l s a m a l s i n a l s o'), ('consaalermtooldlny', 'con s a a l er m t o o l d l n y')]
LibM19130101-V08-01-page27.txt: [('counterdemonstrations', 'counter demonstrations')]
LibM19130101-V08-01-page49.txt: [('agaomooRWairaliOigTiargial', 'a g a o m o o R W a i r a l i O i g T i a r g i a l')]
LibM19130401-V08-02-page52.txt: [('Ostammosanosonsorr', 'O st am m o s a n o s o n s o r r')]
LibM19130701-V08-03-page10.txt: [('lieutenantgovernor', 'lieutenant governor')]
LibM19140101-V09-01-page27.txt: [('establishingreligious', 'establishing religious')]
LibM19140101-V09-01-page4.txt: [('infOrfaiSIOMOMINCOMOCADOVIEMMIGOIMIWA', 'in f O r f a i S I O M O M I N C O M O C A D O V I E M M I G O I M I W A')]
LibM19140701-V09-03-page49.txt: [('SIMMMISMWKlaiigitil', 'S IM M M I S M W K l a i i g i t i l')]
LibM19141001-V09-04-page10.txt: [('counterdenunciations', 'counter denunciations')]
LibM19150101-V10-01-page53.txt: [('nosonmomorwemcwaint', 'no son mom or we m c w a i n t')]
LibM19150701-V10-03-page18.txt: [('governmentsupported', 'government supported')]
LibM19160101-V11-01e-page1.txt: [('sssssssssssssssssssss', 's s s s s s s s s s s s s s s s s s s s s')]
LibM19161001-V11-04-page22.txt: [('counterallegations', 'counter allegations')]
LibM19170101-V12-01-page13.txt: [('democraticrepublican', 'democratic republican')]
LibM19180101-V13-01-page5.txt: [('reconstructionists', 'reconstruction i sts')]
LibM19180101-V13-01-page9.txt: [('antiprohibitionists', 'anti prohibitionists')]
LibM19180401-V13-02-page3.txt: [('inoomalloISOSSIMIS', 'in o o m a l l o I S O S S I M I S'), ('MIMMOMIOSOMOSOISSOISIOISSOSSOSIVISSOSSICSIIISS', 'MIM MOM I O SO M O S O I S S O I S I O I S S O S S O S I V I S S O S S I C S I I I S S')]
LibM19180701-V13-03-page12.txt: [('selfaggrandizement', 'self aggrandizement')]
LibM19180701-V13-03-page27.txt: [('antiecclesiastical', 'anti ecclesiastical')]
LibM19180701-V13-03-page3.txt: [('inimlfilninninilli', 'in im l f i l n i n n i n i l l i')]
LibM19181001-V13-04-page13.txt: [('HIHINHINIIIIIIIRIN', 'HI H IN H IN III III IR IN')]
LibM19190101-V15-01-page3.txt: [('iitoitllislossoliiosill', 'ii to it l l is loss o l ii o s ill')]
LibM19190401-V15-02-page3.txt: [('MIOMMOOOMOOMSOICIMUCHMUSUOIHIUOIMISIUMMICOSISSISSInasseeememeescs', 'M I O M M O O O M O O M S O I C I M U C H M U S U O I H I U O I M I S I U M M I C O S I S S I S S I n a s s e e e m e m e e s c s')]
LibM19190701-V15-03-page36.txt: [('lecosniiionpainOticsovicesfirde', 'le c o s n i i i o n p a i n O t i c s o v i c e s f i r d e')]
LibM19191001-V15-04-page27.txt: [('MIilmilliiimilliifilMIDID', 'M I i l m i l l i i i m i l l i i f i l M I D I D')]
LibM19200401-V14-02-page13.txt: [('IMEMEMEMEMEINIMEIMII', 'I ME ME ME ME ME IN I ME IM II')]
LibM19200401-V14-02-page31.txt: [('RIMMINEIIIIIIIIIMIRRE', 'RIM MIN E I I I I I I I I I M I R R E')]
LibM19200701-V14-03-page12.txt: [('INMPAIAVIMMIPAMIPAMMMIXIMP', 'IN M P A I A V I M M I P A M I P A M M M I X I M P'), ('iillrieeiaiiirriardi', 'i ill r i e e i a i i i r r i a r d i')]
LibM19200701-V14-03-page20.txt: [('UVRAPSIMISULSWIPAMPIAMPV', 'U V R A P S I M I S U L S W I P A M P I A M P V'), ('weiverreitaararforreahaarivitoroyerriiivii', 'we iv err e i t a a r a r f o r r e a h a a r i v i t o r o y e r r i i i v i i')]
LibM19200701-V14-03-page3.txt: [('ipuitnIlinimilliiiinulillluunii', 'i p u i t n I l i n i m i l l i i i i n u l i l l l u u n i i')]
In [40]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/LibM/correction9 Average verified rate: 0.9815728089947997 Average of error rates: 0.0331019809244314 Total token count: 1452039
In [41]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[41]:
[("'", 1499),
('m', 1365),
('d', 1261),
('e', 1023),
('w', 958),
('t', 844),
('n', 797),
('r', 704),
('f', 643),
('g', 391),
('x', 272),
('u', 218),
('k', 195),
('tv', 150),
('th', 117),
('pa', 100),
('sunday-law', 92),
('z', 82),
('ex', 75),
('io', 71),
('id', 71),
('co', 64),
('postmaster-general', 62),
('re', 59),
('ga', 58),
('post-offices', 57),
('mo', 57),
('un-american', 57),
('statute-books', 56),
('va', 56),
('sunday-closing', 54),
('church-and-state', 49),
('mm', 45),
('q', 44),
('un', 43),
('mt', 42),
('attorney-general', 41),
('tion', 40),
('sunday-rest', 39),
('wm', 38),
('pp', 38),
('charta', 37),
('ro', 35),
('li', 35),
('neander', 31),
('-', 30),
('seventhday', 30),
('mi', 28),
('ky', 28),
('religio-political', 27)]
In [42]:
reports.docs_with_high_error_rate(summary)
Out[42]:
[('LibM19200401-V14-02-page4.txt', 1.0),
('LibM19060401-V01-01-page2.txt', 1.0),
('LibM19140701-V09-03-page52.txt', 0.857),
('LibM19110701-V06-03-page1.txt', 0.824),
('LibM19080101-V03-01-page1.txt', 0.812),
('LibM19090401-V04-02-page33.txt', 0.778),
('LibM19110101-V06-01-page1.txt', 0.769),
('LibM19191001-V15-04-page28.txt', 0.75),
('LibM19110701-V06-03-page4.txt', 0.729),
('LibM19080401-V03-02-page19.txt', 0.714),
('LibM19080701-V03-03-page1.txt', 0.687),
('LibM19100101-V05-01-page1.txt', 0.676),
('LibM19111001-V06-04-page1.txt', 0.667),
('LibM19090401-V04-02-page1.txt', 0.667),
('LibM19110401-V06-02-page1.txt', 0.662),
('LibM19081001-V03-04-page1.txt', 0.66),
('LibM19080401-V03-02-page1.txt', 0.659),
('LibM19130701-V08-03-page2.txt', 0.657),
('LibM19100401-V05-02-page1.txt', 0.645),
('LibM19080701-V03-03-page41.txt', 0.619),
('LibM19100701-V05-03-page1.txt', 0.615),
('LibM19140401-V09-02-page1.txt', 0.611),
('LibM19120701-V07-03-page4.txt', 0.605),
('LibM19090401-V04-02-page2.txt', 0.6),
('LibM19170401-V12-02-page1.txt', 0.583),
('LibM19150401-V10-02-page1.txt', 0.577),
('LibM19170701-V12-03-page1.txt', 0.566),
('LibM19180101-V13-01-page4.txt', 0.558),
('LibM19090101-V04-01-page1.txt', 0.543),
('LibM19060401-V01-01-page35.txt', 0.529),
('LibM19191001-V15-04-page1.txt', 0.524),
('LibM19170701-V12-03-page4.txt', 0.5),
('LibM19120401-V07-02-page4.txt', 0.5),
('LibM19101001-V05-04-page1.txt', 0.478),
('LibM19180101-V13-01-page1.txt', 0.471),
('LibM19121001-V07-04-page1.txt', 0.471),
('LibM19190701-V15-03-page36.txt', 0.456),
('LibM19200101-V14-01-page1.txt', 0.455),
('LibM19121001-V07-04-page6.txt', 0.45),
('LibM19070401-V02-02-page36.txt', 0.444),
('LibM19090701-V04-03-page1.txt', 0.419),
('LibM19111001-V06-04-page52.txt', 0.407),
('LibM19151001-V10-04-page7.txt', 0.406),
('LibM19190401-V15-02-page1.txt', 0.4),
('LibM19200401-V14-02-page35.txt', 0.393),
('LibM19071001-V02-04-page18.txt', 0.389),
('LibM19130701-V08-03-page26.txt', 0.387),
('LibM19140101-V09-01-page56.txt', 0.383),
('LibM19141001-V09-04-page52.txt', 0.363),
('LibM19071001-V02-04-page51.txt', 0.36),
('LibM19140701-V09-03-page1.txt', 0.333),
('LibM19200701-V14-03-page1.txt', 0.333),
('LibM19080101-V03-01-page2.txt', 0.333),
('LibM19201001-V14-04-page1.txt', 0.333),
('LibM19120401-V07-02-page40.txt', 0.32),
('LibM19090401-V04-02-page51.txt', 0.312),
('LibM19160701-V11-03-page1.txt', 0.312),
('LibM19140701-V09-03-page4.txt', 0.307),
('LibM19090701-V04-03-page51.txt', 0.294),
('LibM19180701-V13-03-page4.txt', 0.294),
('LibM19140701-V09-03-page49.txt', 0.288),
('LibM19150101-V10-01-page1.txt', 0.286),
('LibM19190101-V15-01-page4.txt', 0.273),
('LibM19130701-V08-03-page27.txt', 0.273),
('LibM19090101-V04-01-page21.txt', 0.262),
('LibM19060401-V01-01-page36.txt', 0.25),
('LibM19200401-V14-02-page1.txt', 0.25),
('LibM19120401-V07-02-page38.txt', 0.245),
('LibM19151001-V10-04-page1.txt', 0.24),
('LibM19090101-V04-01-page52.txt', 0.239),
('LibM19130401-V08-02-page1.txt', 0.235),
('LibM19121001-V07-04-page4.txt', 0.233),
('LibM19120701-V07-03-page1.txt', 0.231),
('LibM19141001-V09-04-page1.txt', 0.222),
('LibM19070401-V02-02-page35.txt', 0.222),
('LibM19130101-V08-01-page1.txt', 0.214),
('LibM19170401-V12-02-page4.txt', 0.214),
('LibM19131001-V08-04-page1.txt', 0.211),
('LibM19080701-V03-03-page52.txt', 0.21),
('LibM19080101-V03-01-page51.txt', 0.208),
('LibM19130701-V08-03-page1.txt', 0.208),
('LibM19100101-V05-01-page31.txt', 0.206),
('LibM19090701-V04-03-page42.txt', 0.206),
('LibM19150101-V10-01-page12.txt', 0.202)]
In [43]:
# %load shared_elements/high_error_rates.py
doc_keys = [x[0] for x in reports.docs_with_high_error_rate(summary) if x[1] > 0.5]
# utilities.open_original_docs(doc_keys, directories['cycle'])
In [ ]: