WHM-OCR-Evaluation-and-Correction

In [1]:
%load_ext autoreload
In [2]:
%autoreload 2
In [3]:
from text2topics import reports
from text2topics import utilities
from text2topics import clean
import re
import os
from os import listdir
from os.path import isfile, join
import collections
In [4]:
%matplotlib inline
In [5]:
wordlist_dir = "/Users/jeriwieringa/Dissertation/drafts/data/word-lists"
wordlists = ["2016-12-07-SDA-last-names.txt", 
             "2016-12-07-SDA-place-names.txt", 
             "2016-12-08-SDA-Vocabulary.txt", 
             "2017-01-03-place-names.txt", 
             "2017-02-14-Base-Word-List-SCOWL&KJV.txt",
             "2017-02-14-Roman-Numerals.txt",
             "2017-03-01-Additional-Approved-Words.txt"
            ]
In [6]:
spelling_dictionary = utilities.create_spelling_dictionary(wordlist_dir, wordlists)
In [7]:
title = "WMH"
In [8]:
base_dir = "/Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/{}/".format(title)

Baseline

In [9]:
cycle = 'baseline'
In [10]:
stats = reports.overview_report(join(base_dir, cycle), spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/baseline

Average verified rate: 0.9316708765632767

Average of error rates: 0.07061265580057527

Total token count: 939453

In [11]:
errors_summary = reports.get_errors_summary( stats )
reports.top_errors( errors_summary, 500 )
Out[11]:
[('-', 1687),
 ('m', 1646),
 ('w', 1492),
 ('g', 1421),
 ('d', 1246),
 ('e', 1229),
 ('¥', 881),
 ('re-', 816),
 ('con-', 748),
 ('tion', 679),
 ('r', 664),
 ('n', 633),
 ('in-', 539)]

Review Special Character Use

In [12]:
reports.tokens_with_special_characters(errors_summary)
Out[12]:
[('¥', 881),
 (')', 432),
 ('(', 368),
 ('ñ', 248),
 ('/', 171),
 ('ñthe', 100),
 ('_', 81),
 ('presidentña', 75),
 ('numbess)in', 70),
 ('¡', 65),
 ('educationñprof', 62),
 ('numbers)in', 62),
 ('%', 62),
 ('treasurerñe', 61),
 ('secretaryñm', 61),
 ('ñmargaret', 58),
 ('(to', 52),
 ('\\', 41),
 ('presidentñs', 38),
 ('ñelder', 37),
 ('treasñjennie', 35),
 ('ñselected', 35),
 ('presidentñm', 32),
 ('ña', 32),
 ('*', 31),
 ('//', 25),
 ('numbers)', 25),
 ('ã', 24),
 ('ñwe', 23),
 ('¢', 22),
 ('ô', 21),
 ('(the', 20),
 ('ñi', 20),
 ('numbeps)in', 19),
 ('numbees)in', 17),
 ('(b)', 16),
 ('ñthat', 16),
 ('(concluded', 16),
 ('¥the', 16),
 ('ñdied', 16),
 ('secretaryñs', 15),
 ('(a)', 15),
 ('ñhattie', 15),
 ('treasurerñd', 15),
 ('ñbrother', 13),
 ('(and', 13),
 ('ñand', 12),
 ('i)', 12),
 ('(ps', 12),
 ('(see', 12),
 ('(c)', 11),
 ('ñin', 11),
 ('(tithe)', 11),
 ('in¥', 11),
 ('ñmiss', 11),
 ('(john', 10),
 ('¥of', 9),
 ('quartetñ', 9),
 ('ñno', 9),
 ('ñdr', 8),
 ('ñfrom', 8),
 ('ñmrs', 8),
 ('ñas', 8),
 ('///', 8),
 ('•', 8),
 ('songñ', 8),
 ('(rev', 8),
 ('numbevs)in', 8),
 ('(heb', 7),
 ('numbews)in', 7),
 ('ñour', 7),
 ('(not', 7),
 ('numbeas)in', 7),
 ('(a', 7),
 ('continued)', 7),
 ('`', 7),
 ('ñw', 7),
 ('\\vest', 7),
 ('(isa', 6),
 ('\\\\', 6),
 ('nña', 6),
 ('ñthis', 6),
 ('ñan', 6),
 ('ñat', 6),
 ('ñsuccess', 6),
 ('(acts', 6),
 ('(d)', 6),
 ('ñit', 6),
 ('ñs', 6),
 ('(continued', 6),
 ('¦', 6),
 ('ó', 6),
 (']', 6),
 ('(matt', 5),
 ('given)', 5),
 ('(sunday)', 5),
 ('ñone', 5),
 ('ñministry', 5),
 ('homeñthe', 5),
 ('(job', 5),
 ('sabbath¥school', 5),
 ('ññ', 5),
 ('(field', 5),
 ('ñnot', 5),
 ('(mrs', 5),
 ('¥we', 5),
 ('ñmen', 5),
 ('¥¥', 5),
 ('=', 5),
 ('educationña', 5),
 ('ñall', 5),
 ('michã', 5),
 ('()reek', 5),
 ('numbess)ln', 5),
 ('the¥', 5),
 ('ñprof', 5),
 ('ñella', 5),
 ('(i)', 4),
 ('(or', 4),
 ('ñthey', 4),
 ('ñsome', 4),
 ('ñgospel', 4),
 ('ñsister', 4),
 ('\ufeff', 4),
 ('\\\\\\\\', 4),
 ('(ex', 4),
 ('\\v', 4),
 ('numbcps)in', 4),
 ('ñj', 4),
 ('[', 4),
 ('wantedña', 4),
 ('ñfor', 4),
 ('i/', 4),
 ('(we', 4),
 ('numbecs)in', 4),
 ('(paper)', 4),
 ('ãã', 4),
 ('numbens)in', 4),
 ('(which', 4),
 ('the_', 4),
 ('ñm', 4),
 ('(luke', 4),
 ('¥in', 4),
 ('(i', 4),
 ('¥¥¥', 4),
 ('ñeld', 4),
 ('ñrev', 3),
 ('(this', 3),
 ('purposeñto', 3),
 ('/-', 3),
 ('(there', 3),
 ('(read', 3),
 ('he¥', 3),
 ('#', 3),
 ('be¥', 3),
 ('ñgeorge', 3),
 ('ob¥', 3),
 ('io¢', 3),
 ('(deut', 3),
 ('and¥', 3),
 ('[john', 3),
 ('(g)', 3),
 ('termñbible', 3),
 ('ñremember', 3),
 ('(so', 3),
 ('hymnñ', 3),
 ('ñtestimonies', 3),
 ('(cloth', 3),
 ('(f)', 3),
 ('(mal', 3),
 ('ñof', 3),
 ('¥to', 3),
 ('¥do', 3),
 ('(margin', 3),
 ('(in', 3),
 ('ñsimply', 3),
 ('(e)', 3),
 ('to¥', 3),
 ('saleña', 3),
 ('~', 3),
 ('¥and', 3),
 ('conference(tithe)', 3),
 ('¥been', 3),
 ('°', 3),
 ('tion)', 3),
 ('¡-', 3),
 ('(he', 3),
 ('o%', 3),
 ('ç', 3),
 ('ñlast', 3),
 ('ñyes', 3),
 ('}', 3),
 ('ñif', 3),
 ('ñis', 3),
 ('`great', 3),
 ('allñthe', 3),
 ('michiganñ', 3),
 ('ñmarianne', 3),
 ('byñ', 3),
 ('`the', 3),
 ('ñbible', 3),
 ('ñto', 3),
 ('(prov', 3),
 ('~~', 3),
 ('/i', 3),
 ('`object', 3),
 ('not¥', 3),
 ('(for', 3),
 ('christñ', 2),
 ('each)', 2),
 ('was¥', 2),
 ('¥a', 2),
 ('¥-', 2),
 ('igo+', 2),
 ('educationñits', 2),
 ('numbests)in', 2),
 ('i~n-', 2),
 ('with¥', 2),
 ('comfortñ', 2),
 ('/(', 2),
 ('can_', 2),
 ('*read', 2),
 ('(july', 2),
 ('¥ñ', 2),
 ('reading)', 2),
 ('ñh', 2),
 ('ñwhether', 2),
 ('ñc', 2),
 ('restñ', 2),
 ('/e', 2),
 ('ñthere', 2),
 ('%%', 2),
 ('(dt', 2),
 ('ñso', 2),
 ('ñf', 2),
 ('(new', 2),
 ('(as', 2),
 (')))', 2),
 ('((that', 2),
 ('young*', 2),
 ('these¥', 2),
 ('(christ)', 2),
 ('carñoh', 2),
 ('ñeven', 2),
 ('_the', 2),
 ('work¥', 2),
 ('ñread', 2),
 ('ex¥', 2),
 ('wig)', 2),
 ('workñnot', 2),
 ('ñwith', 2),
 ('(vs', 2),
 ('(without', 2),
 ('[should]', 2),
 ('¥who', 2),
 ('on¥', 2),
 ('them)', 2),
 ('-ô', 2),
 ('ñhealth', 2),
 ('ñever', 2),
 ('grammarñcomplete', 2),
 ('usedñthe', 2),
 ('+', 2),
 ('to¢', 2),
 ('ant)', 2),
 (')ñ', 2),
 ('back)', 2),
 ('[for', 2),
 ('>', 2),
 ('christ)', 2),
 ('this¥', 2),
 ('ñjames', 2),
 ('beñ', 2),
 ('(vol', 2),
 ("'¥", 2),
 ('church)', 2),
 ('ñhad', 2),
 ('(h)', 2),
 ('ñtwo', 2),
 ('ñare', 2),
 ('guidanceñ', 2),
 ("['sego", 2),
 ('paperñduties', 2),
 ('foodñ', 2),
 ('/a', 2),
 ('o¢', 2),
 ('ñreports', 2),
 ('r/', 2),
 ('\\k', 2),
 ('(april', 2),
 ('tions)', 2),
 ('ñwhen', 2),
 ('soloñ', 2),
 ('n¢', 2),
 ('quartetteñ', 2),
 ('childñhis', 2),
 ('ñfebruary', 2),
 ('in*', 2),
 ('(verse', 2),
 ('¥for', 2),
 ("///'", 2),
 ('numbers)ln', 2),
 ('ñsel', 2),
 ('camp¥meeting', 2),
 ('lord)', 2),
 ('ñseveral', 2),
 ('%two', 2),
 ('-¥', 2),
 ('drinkñ', 2),
 ('eternityñ', 2),
 ('¥c', 2),
 ('(those', 2),
 ('ñherrick', 2),
 ('sec¥', 2),
 ('fearsñhe', 2),
 ('(concluded)', 2),
 ('ñu', 2),
 ('ñsir', 2),
 ('(front', 2),
 ('subscriptions)', 2),
 ('page)', 2),
 ("'/", 2),
 ('father)', 2),
 ('greek]', 2),
 ('sabbath¥', 2),
 ('(iii', 2),
 ('franciscoñfell', 2),
 ('ñmembers', 2),
 ('(nov', 2),
 ('(isaiah', 2),
 ('(all', 2),
 ('(minister)', 2),
 ('they_', 2),
 ('smileñ', 2),
 ('ñjohn', 2),
 ('itñ', 2),
 ('his¥', 2),
 ('/#', 2),
 ('ñwill', 2),
 ('`we', 2),
 ('ñlittle', 2),
 ('presidentñ', 2),
 ('(-', 2),
 (')))))', 2),
 ('ñd', 2),
 ('camp¥', 2),
 ('it¥', 2),
 ('ñsabbath', 2),
 ('_in', 2),
 ('trueñ', 2),
 ('(money', 2),
 ('ñwas', 2),
 ('saleñforty-acre', 2),
 ('%v', 2),
 ('(paper', 2),
 ('bibleñold', 2),
 ('ñby', 2),
 ('(poetry', 2),
 ('ñprofessor', 2),
 (')(', 2),
 ('(even', 2),
 ('ñbut', 2),
 ('god)', 2),
 ('(ga', 2),
 ('(note', 2),
 ('(ecc', 2),
 ('paperñhow', 2),
 ('to¥show', 2),
 ('¥they', 2),
 ('(swedish)', 2),
 ('is¥', 2),
 ('to-day)', 2),
 ('ñthose', 2),
 ('paperñthe', 2),
 ('prayerñ', 2),
 ('♦', 2),
 ('saysñ', 2),
 ('(jno', 2),
 ('bookñthe', 2),
 ('¥be', 2),
 ('ñwhat', 2),
 ('re¥', 2),
 ('(life', 2),
 ('__', 2),
 ('(col', 2),
 ('_this', 2),
 ('heartñ', 2),
 ("¥'", 2),
 ('numbers)i', 2),
 ('[tight', 2),
 ('joyñ', 2),
 ('more¥', 2),
 ('ñabraham', 2),
 ('(psalms', 2),
 ('a¥', 2),
 ('(with', 2),
 ('`it', 2),
 ('ñwhich', 2),
 ('and_', 2),
 ('anythingñ', 2),
 ('is_', 2),
 ('that¥', 2),
 ('ñ-', 2),
 ('_have', 2),
 ('ñcamp-meetings', 2),
 ("(god's)", 1),
 ('`ye', 1),
 ('¥ence', 1),
 ('health]', 1),
 ('_lessons', 1),
 ('ñcollege', 1),
 ('countryñmussoorie', 1),
 ('ñhistorical', 1),
 ('(tile', 1),
 ('margaret¥ilaughey', 1),
 ('(twins', 1),
 ('gui)', 1),
 ('criti¥', 1),
 ('ques¥', 1),
 ('heartsñto', 1),
 ("'ô\\", 1),
 ('\\\\e', 1),
 ('_materials', 1),
 ('twoñgeneral', 1),
 ('[or', 1),
 ('to¥your', 1),
 ('burnhamñallegan', 1),
 ('baffledñdestroyed', 1),
 ('a\\mir', 1),
 ('was]', 1),
 ('`prepare', 1),
 ('(bishop)', 1),
 ('=pill/irk', 1),
 ('deredñ', 1),
 ('r)r', 1),
 ('//ii/', 1),
 ('before)', 1),
 ('michôan', 1),
 ('^', 1),
 ('(actions', 1),
 ('whileñis', 1),
 ('purposeñabraham', 1),
 ('(adopted', 1),
 ('%%us', 1),
 ('numbers)in-advance', 1),
 ('mentionedñconducting', 1),
 ('¥=', 1),
 ('_here', 1),
 ('and¥sisters', 1),
 ('every-__', 1),
 ('(margin)', 1),
 ('previous¥', 1),
 ('veas(', 1),
 ('possible¡', 1),
 ('illgami/', 1),
 ('causeñan', 1),
 ('(german', 1),
 ('ii)', 1),
 ('of¥', 1),
 ('stateñsome', 1),
 ('merriamñlowell', 1),
 ('(adv', 1),
 ('ined)', 1),
 ('shawñdied', 1),
 ('be=', 1),
 ('lettersñthe', 1),
 ('adam¥transgressed', 1),
 ('(forty', 1),
 ('wic/', 1),
 ('\\j', 1),
 ('the`lord', 1),
 ('out¥', 1),
 ('ñeugene', 1),
 ('pm/', 1),
 ('trioñ', 1),
 ('m(', 1),
 ('deposits)', 1),
 ('ñordis', 1),
 (')}', 1),
 ('¨f', 1),
 ('center¥ñ', 1),
 ('convenñ', 1),
 ('scherzoñ', 1),
 ('exerciseñthe', 1),
 ('peo¥', 1),
 ('says)', 1),
 ('cx)', 1),
 ('or¥', 1),
 ('ñ=', 1),
 ('trainingñthe', 1),
 ('et*', 1),
 ("stringsñsailor'", 1),
 ('¥edward', 1),
 ('drillsñreading', 1),
 ('ñyouth', 1),
 ('(about', 1),
 ('v/', 1),
 ('_consideration', 1),
 ('lord¥', 1),
 ('atedña', 1),
 ('ñought', 1),
 ('mornñso', 1),
 ('ñoh', 1),
 ('prayers_', 1),
 ('mer_', 1),
 ('godñsome', 1),
 ('p/a', 1),
 ('dieñas', 1),
 ('%ell', 1),
 ('statementñ', 1),
 ('me)', 1),
 ('publishing_', 1),
 ("worldã'¥", 1),
 ("'ñand", 1),
 ('bodyña', 1),
 ('(that', 1),
 ('praise¥god', 1),
 ('wilburñportland', 1),
 ('preparationñits', 1),
 ('(ii', 1),
 ('meansñmen', 1),
 ("(/'", 1),
 ('ãg', 1),
 ('(vest', 1),
 ('compassionñ', 1),
 ('_lumber', 1),
 ('ñon', 1),
 ('/efr/', 1),
 ('willing¥', 1),
 ('ñeben', 1),
 ('thoughtñdivine', 1),
 ('pro¥', 1),
 ('ñworld', 1),
 ('necessaryñ', 1),
 ('exercisedñthe', 1),
 ('(excluding', 1),
 ('not¥in', 1),
 ('yardñwinifred', 1),
 ('oc)', 1),
 ("wr'%ô", 1),
 ('baptizedñthis', 1),
 ('the¥formation', 1),
 ('zw/i/', 1),
 ('arithmeticñcomplete', 1),
 ('overlookedñthe', 1),
 ('heartñgrowing', 1),
 ('ver)', 1),
 ('artñall', 1),
 ('each¥way', 1),
 ('bibleñchurch', 1),
 ('wordsñand', 1),
 ('some)', 1),
 ('morningñwhen', 1),
 ('accomplishñ', 1),
 ('tionñfurnishes', 1),
 ('s\\', 1),
 ('yô', 1),
 ('it/', 1),
 ('known)', 1),
 ('ñsabbath-', 1),
 ('¥from', 1),
 ('wa¤', 1),
 ('tory)', 1),
 ('downñthe', 1),
 ('ñfell', 1),
 ('valie¥', 1),
 ('a%\\', 1),
 ('cottñan', 1),
 ('a*', 1),
 ('standsñis', 1),
 ('friendñ', 1),
 ('ñper-', 1),
 ('goñall', 1),
 ('years)', 1),
 ('nersñand', 1),
 ('lith**', 1),
 ('ñarticles', 1),
 ("ô'd", 1),
 ('baptistñmillie', 1),
 ('membersñtwo', 1),
 ('ôi', 1),
 ('distantña', 1),
 ('stormôso', 1),
 ('lostñat', 1),
 ('(ger-', 1),
 ('trial¥', 1),
 ('pesveas(', 1),
 ('made¥', 1),
 ('%or', 1),
 ('here¥and', 1),
 ('numbcps)', 1),
 ('(absolute)', 1),
 ("ã'", 1),
 ('*have', 1),
 ('(mar-', 1),
 ('(d', 1),
 ('beastñthe', 1),
 ('(broth-', 1),
 ('inheritedñ', 1),
 ('a)', 1),
 ('teachersñ(', 1),
 ('ñforty', 1),
 ('((armed¥', 1),
 ('(virginia)', 1),
 ('pierceñmrs', 1),
 ('all¥the', 1),
 ('[not', 1),
 ('firga/', 1),
 ('emptyñcontribute', 1),
 ('thingñonly', 1),
 ('(log', 1),
 ('infancy)', 1),
 ('_---', 1),
 ("¥'we", 1),
 ('holding_', 1),
 ('lôilorning', 1),
 ('mountainsñwas', 1),
 ('institu¥', 1),
 ('then_', 1),
 ('ñtwenty-two', 1),
 ('does¥', 1),
 ('wrong¥', 1),
 ('meñif', 1),
 ('primaryãand', 1),
 ('èè', 1),
 ('\\varner', 1),
 ('rhetoricñkellogg', 1),
 ('ninthñnever', 1),
 ('translation)', 1),
 ('ñatlantic', 1),
 ('when_', 1),
 ("'illl~l", 1),
 ('ãmin', 1),
 ('itumegoc(', 1),
 ('¥incomparable', 1),
 ('appoint=', 1),
 ('ãli', 1),
 ('secondñdrink', 1),
 ('feelñwell', 1),
 ('comeñlet', 1),
 ('ñeating', 1),
 ('_sister', 1),
 ('iff(iii', 1),
 ('¥life', 1),
 ("'¥'", 1),
 ('pesyeas(', 1),
 ('withrowñdied', 1),
 ('recitationñ', 1),
 ('ñsaving', 1),
 ('(retail', 1),
 ('fit*takki', 1),
 ("curse')", 1),
 ('diedñin', 1),
 ('ñr', 1),
 ('(ise', 1),
 ('jam(', 1),
 ('egypt)', 1),
 ('a/', 1),
 ('(ind', 1),
 ('placesñthirty-three', 1),
 ("botanyñleavitt's", 1),
 ('ringsñby', 1),
 ('continued¥)', 1),
 ('z/g¥', 1),
 ('(board', 1),
 ('vs¥m', 1),
 ('godña', 1),
 ('cal*', 1),
 ('ñbrethren', 1),
 ('scaledñgod', 1),
 ('(denomi-', 1),
 ('(africa)', 1),
 ('ñhalf', 1),
 ('t*', 1),
 ('ammo(', 1),
 ('of`the', 1),
 ('property_', 1),
 ('use¥of', 1),
 ('nexus¥', 1),
 ('new¥', 1),
 ('(apostolic', 1),
 ('(march', 1),
 ('tistiofflau_j', 1),
 ('¥ations', 1),
 ('ñliquor', 1),
 ('(no', 1),
 ('minutes)', 1),
 ('crafts)', 1),
 ('ñstephen', 1),
 ('awayñtheir', 1),
 ('fieldñ', 1),
 ('ñwell', 1),
 ('ñor', 1),
 ('christñwho', 1),
 ('workñbeing', 1),
 ('paperñhave', 1),
 ('ñhe', 1),
 ('%%mo', 1),
 ('ñmain-', 1),
 ('(saturday)', 1),
 ('(danish-', 1),
 ('prophetically)', 1),
 ('deliver¥', 1),
 ('çflaiii', 1),
 ('\\\\ittuto', 1),
 ('¥ten', 1),
 ('edu¥', 1),
 ('riversñfifteen', 1),
 ('haugheyñotsego', 1),
 ('knowñi', 1),
 ('numbems)in', 1),
 ('/tioheagigt', 1),
 ('flowñ', 1),
 ('ñready', 1),
 ('~niiii', 1),
 ('*you', 1),
 ('letterñfrom', 1),
 ('jo¡', 1),
 ('corm)', 1),
 ('land)', 1),
 ('we¥are', 1),
 ('ñgeneral', 1),
 ('*two', 1),
 ('(should', 1),
 ("\\ctrir''", 1),
 ('tenthñ', 1),
 ('areña', 1),
 ('loi#d', 1),
 ('(under', 1),
 ('(especially', 1),
 ('two¥', 1),
 ('meri¥', 1),
 ('¥corliss', 1),
 ('are¥', 1),
 ('ñtogether', 1),
 ('thousand¥', 1),
 ('%vest', 1),
 ('t-}', 1),
 ('sorrowñ', 1),
 ('aboveñcause', 1),
 ('butterfieldñbuchanan', 1),
 ('spearñfell', 1),
 ('appe¥', 1),
 ('christñhe', 1),
 ('(except', 1),
 ('agesñ', 1),
 ('w//', 1),
 ('`m~d', 1),
 ('[the', 1),
 ('bath¥keepers', 1),
 ('¡heaven', 1),
 ('ñtuesday', 1),
 ('distanceñthe', 1),
 ('¥kalama', 1),
 ('c)', 1),
 ('(v', 1),
 ('_read', 1),
 ('tentsñone', 1),
 ('_e__zeo', 1),
 ('shriekñ', 1),
 ('ñenough', 1),
 ('tentsñthe', 1),
 ('(they', 1),
 ('homeñ', 1),
 ('fb/', 1),
 ('copyñthe', 1),
 ('smithñgrandville', 1),
 ('plifitt/', 1),
 ('satan¥', 1),
 ('departmentñtwo', 1),
 ('bandñ', 1),
 ('ñtemporal', 1),
 ('grandville_', 1),
 ('privilege/to', 1),
 ('faultsñshould', 1),
 ('answerñ', 1),
 ('(whatsoever', 1),
 ('body)', 1),
 ('[c]', 1),
 ('ac}', 1),
 ('¥usñthe', 1),
 ('portunity¥', 1),
 ('o/', 1),
 ('ñreasons', 1),
 ('r¢', 1),
 ('year(', 1),
 ('some`consideration', 1),
 ('ñmeetings', 1),
 ('()rues¡', 1),
 ('il/', 1),
 ('standpointña', 1),
 ('ñhand', 1),
 ('unionñfrank', 1),
 ('¡()', 1),
 ('wantedñto', 1),
 ("'%\\%", 1),
 ('scienceñelementary', 1),
 ('orphanñit', 1),
 ('¥planting', 1),
 ('(bo', 1),
 ('ñspeaking', 1),
 ('found¥the', 1),
 ('a/pfi', 1),
 ('¥ed', 1),
 ('`\\_\\_', 1),
 ('(two', 1),
 ('meet=', 1),
 ('\\\\ô\\', 1),
 ('aon¥', 1),
 ('%moo', 1),
 ('areñthe', 1),
 ('a(a', 1),
 ('thatñ', 1),
 ("under'compulsion)", 1),
 ('-_', 1),
 ('ãt', 1),
 ('ho\\tever', 1),
 ('hillginc/', 1),
 ('discus-¥', 1),
 (')im', 1),
 ('(his', 1),
 ('go%', 1),
 ('ingsña', 1),
 ('pennsyl-(', 1),
 ('*heaven', 1),
 ("tm'\\", 1),
 ('reveals¥', 1),
 ('vvr/rip', 1),
 ('worldñthey', 1),
 ('*out', 1),
 ('(forces)', 1),
 ('laterñ', 1),
 ('is`situated', 1),
 ('graceñlove', 1),
 ('¥rela-', 1),
 ('ñmay', 1),
 ('(illus', 1),
 ('ñlet', 1),
 ("'wm*", 1),
 ('at¥corn', 1),
 ('itñthat', 1),
 ('up¥and', 1),
 ('holidayñdied', 1),
 ('yearñ', 1),
 ('shoulc_lopot', 1),
 ('there¥', 1),
 ('oneñto', 1),
 ('(symbolically', 1),
 ('prospectñwe', 1),
 ('turn_pale', 1),
 ("ãa'", 1),
 ('g¥', 1),
 ('(pest', 1),
 ('questionñis', 1),
 ("'ñone", 1),
 ('/inj', 1),
 ('_effect', 1),
 ('society(', 1),
 ('collardñdied', 1),
 ('ft/', 1),
 ('¥secretary', 1),
 ("\\n\\'", 1),
 ('(twelve', 1),
 ('bornñon', 1),
 ('may¥', 1),
 ('ñrepairs', 1),
 ('fãigr', 1),
 ("~iqiiiidiiiniinii(i'''", 1),
 ("/'i", 1),
 ('fridayñprepared', 1),
 ('_-', 1),
 ('governorñduties', 1),
 ('extras)', 1),
 ('history¥', 1),
 ('ere/', 1),
 ('*is%', 1),
 ('often*', 1),
 ('cudneyñdied', 1),
 ('(they)', 1),
 ('youñyou', 1),
 ('con_erning', 1),
 ('classesñintermediate', 1),
 ('roil/', 1),
 ('ii¥', 1),
 ('of/', 1),
 ('ñeach', 1),
 ("ñ'", 1),
 ('\\yam', 1),
 ('_apply', 1),
 ('`illessed', 1),
 ('*and', 1),
 ('(ilatchman', 1),
 ('resum¥', 1),
 ('ñlegislative', 1),
 ('peap(', 1),
 ('<', 1),
 ('/cartoinmtza', 1),
 ('fie/l(', 1),
 ('messageñto', 1),
 (')he', 1),
 ('_god', 1),
 ('smithyñlesson', 1),
 ('whiteñthe', 1),
 ('re/', 1),
 ('sugar)', 1),
 ("('", 1),
 ('ñhave', 1),
 ('_much', 1),
 ('\\ô\\', 1),
 ('c_aivy', 1),
 ('for_room', 1),
 ('largeñone', 1),
 ('*licentiates', 1),
 ('influenceñin', 1),
 ('sutherland)', 1),
 ('k/aw', 1),
 ('ñdo', 1),
 ('ããilicom', 1),
 ('treasurerñ', 1),
 ('eoñternal', 1),
 ('tells_', 1),
 ('jill)', 1),
 ('before¥', 1),
 ('¥v', 1),
 ('`cast', 1),
 ('wedgeñkindness', 1),
 ('s¥ix', 1),
 ('illl~', 1),
 ('//mi', 1),
 ('kind)', 1),
 ('bless¥', 1),
 ("/g'", 1),
 ('poundñhath', 1),
 ('general_', 1),
 ('israelñ', 1),
 ('-•', 1),
 ('(such', 1),
 ('ureclly_il', 1),
 ('affliction_', 1),
 ('thisñthere', 1),
 ('ñnow', 1),
 ('solveñ', 1),
 ('*in', 1),
 ('house)', 1),
 ('ñduring', 1),
 ('freedomñ', 1),
 ('g\\ta', 1),
 ('¥bible', 1),
 ('other_', 1),
 ('(t', 1),
 ('aroundñ', 1),
 ('sundayñin', 1),
 ('athafitov/iati', 1),
 ('produce_a', 1),
 ('the¥thanksgiving', 1),
 ('/z//', 1),
 ('(story)', 1),
 ('myself)', 1),
 ('an¥', 1),
 ('*conference', 1),
 ('after¥', 1),
 ('ñsigns', 1),
 ('mcmorran*', 1),
 ('¥training', 1),
 ('ñen', 1),
 ('(george', 1),
 ('ordersñin', 1),
 ('seventhñbe', 1),
 ('edñdr', 1),
 ('ñ`a', 1),
 ('%ago', 1),
 ('vegetarianismñits', 1),
 ('conventionsñin', 1),
 ('%*', 1),
 ('(lowelf)', 1),
 ('_someone', 1),
 ('(let', 1),
 ('very¥', 1),
 ('siteaga_', 1),
 ('`value', 1),
 ('ñdwelling', 1),
 ('ñhattiee', 1),
 ('speakñmen', 1),
 ('pleasures)', 1),
 ('ò', 1),
 ('snunbers)in', 1),
 ('crossñ', 1),
 ('ñthese', 1),
 ('ro%', 1),
 ('mel/', 1),
 ('ñsunday-closing', 1),
 ('cheapñrubber-tired', 1),
 ('`permit', 1),
 ('(church', 1),
 ('stateñpublishes', 1),
 ("salvation'of¥", 1),
 ('bodiesñlet', 1),
 ('my_', 1),
 ('yeas(', 1),
 ("volun¥teers'", 1),
 ...]

Correction 1 -- Normalize Characters

In [14]:
# %load shared_elements/normalize_characters.py
prev = "baseline"
cycle = "correction1"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    # Substitute for all other dashes
    content = re.sub(r"—-—–‑", r"-", content)

    # Substitute formatted apostrophe
    content = re.sub(r"\’\’\‘\'\‛\´", r"'", content)
    
    # Replace all special characters with a space (as these tend to occur at the end of lines)
    content = re.sub(r"[^a-zA-Z0-9\s,.!?$:;\-&\'\"]", r" ", content)
    
    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [17]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction1

Average verified rate: 0.9377967276021958

Average of error rates: 0.0643058485139022

Total token count: 938150

In [18]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[18]:
[('m', 1749),
 ('-', 1720),
 ('w', 1503),
 ('g', 1434),
 ('e', 1307),
 ('d', 1276),
 ('re-', 816),
 ('con-', 750),
 ('tion', 684),
 ('r', 681),
 ('n', 644),
 ('in-', 539),
 ("'", 507),
 ('be-', 471),
 ('f', 444),
 ('t', 381),
 ('de-', 377),
 ('com-', 339),
 ('ex-', 332),
 ('michi-', 328),
 ('th', 289),
 ('sab-', 285),
 ('ment', 283),
 ('ence', 267),
 ('en-', 233),
 ('peo-', 226),
 ('sabbath-', 223),
 ('ly', 220),
 ('ference', 212),
 ('ple', 207),
 ('confer-', 207),
 ('pre-', 203),
 ('tions', 189),
 ('ad-', 186),
 ('dis-', 178),
 ('at-', 173),
 ('oo', 172),
 ('im-', 167),
 ('mis-', 164),
 ('un-', 163),
 ('meet-', 162),
 ('ers', 162),
 ('ac-', 161),
 ('pro-', 153),
 ('per-', 146),
 ('ber', 137),
 ('io', 117),
 ('ap-', 116),
 ('ren', 114),
 ('ary', 113)]

Correction 2 -- Connect Line Endings

In [20]:
# %load shared_elements/correct_line_endings.py
prev = cycle
cycle = "correction2"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    content = re.sub(r"(\w+)(\-\s{1,})([a-z]+)", r"\1\3", content)

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
In [23]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction2

Average verified rate: 0.9726863553068523

Average of error rates: 0.029485139022051778

Total token count: 915147

In [24]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[24]:
[('m', 1747),
 ('-', 1700),
 ('w', 1503),
 ('g', 1434),
 ('e', 1307),
 ('d', 1275),
 ('r', 680),
 ('n', 644),
 ("'", 507),
 ('f', 443),
 ('t', 377),
 ('th', 283),
 ('oo', 171),
 ('sabbathschool', 163),
 ('io', 117),
 ('mt', 108),
 ('k', 106),
 ('co', 102),
 ('ro', 94),
 ('wm', 82),
 ('numbess', 75),
 ('u', 69),
 ("'field", 67),
 ("canvassers'", 58),
 ('--', 50),
 ('x', 46),
 ("'the", 44),
 ('horr', 39),
 ("the'", 38),
 ('rd', 33),
 ('blendon', 32),
 ('mid-summer', 32),
 ('brower', 31),
 ("f'd", 30),
 ('-the', 29),
 ('harnden', 29),
 ('mchugh', 29),
 ('nd', 28),
 ('seventhday', 28),
 ('cleora', 27),
 ('ex', 26),
 ('tion', 25),
 ('sabbathschools', 23),
 ('q', 23),
 ('nunica', 23),
 ('con-', 22),
 ("'to", 22),
 ('vowyla', 21),
 ('-and', 21),
 ('loth', 20)]

Correction 3 -- Remove extra dashes

In [26]:
# %load shared_elements/remove_extra_dashes.py
prev = cycle
cycle = "correction3"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    replacements = []
    for token in tokens:
        if token[0] is "-":
            replacements.append((token, token[1:]))
            
        elif token[-1] is "-":
            replacements.append((token, token[:-1]))
        else:
            pass
        
    if len(replacements) > 0:
        print("{}: {}".format(filename, replacements))
        
        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
WMH19030128-V01-04-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('labor-', 'labor'), ('-', '')]
WMH19030128-V01-04-page2.txt: [('-Prpartittrrit.', 'Prpartittrrit.')]
WMH19030128-V01-04-page3.txt: [('-', ''), ('HER-', 'HER')]
WMH19030128-V01-04-page4.txt: [('-', ''), ('Mich-', 'Mich')]
WMH19030204-V01-05-page1.txt: [('-', ''), ('fin-', 'fin'), ('-', ''), ('-', '')]
WMH19030204-V01-05-page2.txt: [('purit-', 'purit')]
WMH19030211-V01-06-page1.txt: [('-Is', 'Is'), ('-', ''), ('-', '')]
WMH19030211-V01-06-page3.txt: [('partic-', 'partic')]
WMH19030311-V01-10-page2.txt: [('ambush-', 'ambush')]
WMH19030311-V01-10-page3.txt: [('morn-', 'morn')]
WMH19030311-V01-10-page4.txt: [('en-', 'en'), ('TRAVIS.-', 'TRAVIS.'), ('WILBUR.-', 'WILBUR.')]
WMH19030415-V01-15-page2.txt: [('IMPRES-', 'IMPRES'), ('corn-', 'corn')]
WMH19030415-V01-15-page3.txt: [('-', ''), ('-air', 'air')]
WMH19030415-V01-15-page4.txt: [('HER-', 'HER')]
WMH19030506-V01-18-page1.txt: [('COM-', 'COM')]
WMH19030506-V01-18-page2.txt: [('cul-', 'cul')]
WMH19030506-V01-18-page4.txt: [('-', ''), ('meet-', 'meet'), ('Les-', 'Les'), ('conver-', 'conver'), ('forgive-', 'forgive'), ('sub-', 'sub'), ('HER-', 'HER')]
WMH19030513-V01-19-page2.txt: [('-formidable', 'formidable')]
WMH19030520-V01-20-page1.txt: [('-', '')]
WMH19030520-V01-20-page2.txt: [('-Drpartment', 'Drpartment'), ('--No.', '-No.')]
WMH19030520-V01-20-page3.txt: [('temperature-', 'temperature'), ('-', ''), ('temperature-', 'temperature'), ('-', ''), ('applications-', 'applications')]
WMH19030520-V01-20-page4.txt: [('Sand-', 'Sand')]
WMH19030527-V01-21-page1.txt: [('-', ''), ('-', ''), ('Heb-', 'Heb'), ('with-', 'with')]
WMH19030527-V01-21-page2.txt: [('--such', '-such'), ('faith-', 'faith')]
WMH19030527-V01-21-page3.txt: [('BAND-', 'BAND'), ('pun-', 'pun'), ('-', ''), ('ali-', 'ali'), ('rep-', 'rep')]
WMH19030603-V01-22-page1.txt: [('TES-', 'TES')]
WMH19030603-V01-22-page2.txt: [('SAB-', 'SAB'), ('CON-', 'CON')]
WMH19030603-V01-22-page3.txt: [('----', '---'), ('AB-', 'AB'), ('-', ''), ('physi-', 'physi')]
WMH19030603-V01-22-page4.txt: [('-', '')]
WMH19030610-V01-23-page1.txt: [('-', ''), ('-', '')]
WMH19030610-V01-23-page3.txt: [('-', ''), ('-', ''), ('reason-', 'reason')]
WMH19030624-V01-25-page1.txt: [('DEpART-', 'DEpART'), ('-', '')]
WMH19030624-V01-25-page3.txt: [('DETERIORA-', 'DETERIORA')]
WMH19030624-V01-25-page4.txt: [('Endeavor.-', 'Endeavor.'), ('Mc-', 'Mc')]
WMH19030701-V01-26-page1.txt: [('-', ''), ('any.-', 'any.'), ('DEPART-', 'DEPART')]
WMH19030701-V01-26-page3.txt: [('-', ''), ('-', ''), ('distribu-', 'distribu'), ('per-', 'per')]
WMH19030701-V01-26-page4.txt: [('-', '')]
WMH19030708-V01-27-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19030708-V01-27-page2.txt: [('-', ''), ('-', '')]
WMH19030708-V01-27-page3.txt: [('pro-', 'pro')]
WMH19030708-V01-27-page4.txt: [('Le-', 'Le'), ('-', '')]
WMH19030715-V01-28-page1.txt: [('-', '')]
WMH19030715-V01-28-page2.txt: [('-chool', 'chool')]
WMH19030715-V01-28-page3.txt: [('-IN', 'IN')]
WMH19030715-V01-28-page4.txt: [('HER-', 'HER'), ('non-un-', 'non-un')]
WMH19030722-V01-29-page1.txt: [('-', '')]
WMH19030722-V01-29-page2.txt: [('assem-', 'assem')]
WMH19030722-V01-29-page3.txt: [('-', ''), ('-', '')]
WMH19030722-V01-29-page4.txt: [('cents-', 'cents'), ('-Elder', 'Elder')]
WMH19030930-V01-39-page3.txt: [('in-', 'in'), ('-to', 'to')]
WMH19030930-V01-39-page4.txt: [('-', ''), ('-', ''), ('-revived.', 'revived.')]
WMH19031028-V01-43-page1.txt: [('-the', 'the'), ('corn-', 'corn'), ('-and', 'and'), ('be-', 'be'), ('-into', 'into')]
WMH19031028-V01-43-page4.txt: [('-', ''), ('-judgment', 'judgment'), ('-', '')]
WMH19031118-V01-46-page1.txt: [('-', ''), ('-', ''), ('mission--', 'mission-')]
WMH19031118-V01-46-page4.txt: [('-the', 'the'), ('-', '')]
WMH19040106-V02-02-page1.txt: [('every-', 'every'), ('peo-', 'peo'), ('-', ''), ('cor-', 'cor')]
WMH19040106-V02-02-page2.txt: [('indi-', 'indi')]
WMH19040106-V02-02-page3.txt: [('-such', 'such')]
WMH19040106-V02-02-page4.txt: [('--Prof.', '-Prof.'), ('Wag-', 'Wag'), ('"Work-', '"Work')]
WMH19040113-V02-03-page1.txt: [('-', ''), ('Van-', 'Van'), ('camp-meet-', 'camp-meet'), ('now-', 'now'), ('conven-', 'conven')]
WMH19040113-V02-03-page2.txt: [('God--', 'God-')]
WMH19040113-V02-03-page3.txt: [('AC-', 'AC'), ('-', '')]
WMH19040113-V02-03-page4.txt: [('--A', '-A')]
WMH19040127-V02-04-page1.txt: [('PRO-', 'PRO'), ('continu-', 'continu')]
WMH19040127-V02-04-page2.txt: [('as-', 'as')]
WMH19040127-V02-04-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19040127-V02-04-page4.txt: [('church.-', 'church.'), ('improve-', 'improve'), ('-A', 'A')]
WMH19040203-V02-05-page3.txt: [('GENER-', 'GENER')]
WMH19040203-V02-05-page4.txt: [('INSTRUCT-', 'INSTRUCT'), ('INSTRUCT-', 'INSTRUCT'), ('-', ''), ('at-', 'at'), ('--"We', '-"We')]
WMH19040210-V02-06-page1.txt: [('-', ''), ('-', ''), ('en-', 'en')]
WMH19040210-V02-06-page2.txt: [('educa-', 'educa')]
WMH19040210-V02-06-page3.txt: [('-', ''), ('Seventh-', 'Seventh'), ('-', '')]
WMH19040210-V02-06-page4.txt: [('--In', '-In'), ('San-', 'San'), ('--The', '-The')]
WMH19040217-V02-07-page1.txt: [('--sacred', '-sacred'), ('Assn.-', 'Assn.'), ('-', ''), ('President-', 'President')]
WMH19040217-V02-07-page3.txt: [('-', ''), ('-', '')]
WMH19040217-V02-07-page4.txt: [('--Elder', '-Elder'), ('-will', 'will')]
WMH19040224-V02-08-page1.txt: [('President-', 'President'), ('Assn.-', 'Assn.'), ('con-', 'con'), ('priv-', 'priv')]
WMH19040224-V02-08-page2.txt: [('-', ''), ('na-', 'na')]
WMH19040224-V02-08-page4.txt: [('-', ''), ('--Elder', '-Elder'), ('--The', '-The'), ('-will', 'will'), ('o--', 'o-')]
WMH19040302-V02-09-page1.txt: [('-to', 'to')]
WMH19040302-V02-09-page3.txt: [('--"the', '-"the'), ('-Dissipating', 'Dissipating')]
WMH19040302-V02-09-page4.txt: [('-', ''), ('-an', 'an'), ('-', '')]
WMH19040309-V02-10-page1.txt: [('Bat-', 'Bat'), ('-', '')]
WMH19040309-V02-10-page2.txt: [('AD-', 'AD'), ('-subscriptions', 'subscriptions')]
WMH19040309-V02-10-page3.txt: [('-This', 'This'), ('-', '')]
WMH19040309-V02-10-page4.txt: [('--"We', '-"We')]
WMH19040316-V02-11-page1.txt: [('righteous-', 'righteous'), ('Savioin-', 'Savioin'), ('Christ.--', 'Christ.-')]
WMH19040316-V02-11-page2.txt: [('Berrien-', 'Berrien'), ('be-', 'be')]
WMH19040316-V02-11-page3.txt: [('--.', '-.')]
WMH19040316-V02-11-page4.txt: [('-', ''), ('-At', 'At'), ('-to', 'to')]
WMH19040323-V02-12-page1.txt: [('-F.', 'F.'), ('-the', 'the'), ('-', '')]
WMH19040323-V02-12-page2.txt: [('and-', 'and')]
WMH19040323-V02-12-page3.txt: [('-taken', 'taken'), ('-', '')]
WMH19040323-V02-12-page4.txt: [('Mc-', 'Mc')]
WMH19040330-V02-13-page1.txt: [('--', '-'), ('RE-', 'RE'), ('PER-', 'PER')]
WMH19040330-V02-13-page2.txt: [('-have', 'have'), ('REC-', 'REC')]
WMH19040330-V02-13-page3.txt: [('bili-', 'bili'), ('biliousness.-', 'biliousness.')]
WMH19040330-V02-13-page4.txt: [('-be', 'be')]
WMH19040406-V02-14-page3.txt: [('In-', 'In'), ('abdom-', 'abdom'), ('-', '')]
WMH19040406-V02-14-page4.txt: [('-', '')]
WMH19040413-V02-15-page3.txt: [('an-', 'an'), ('-', '')]
WMH19040413-V02-15-page4.txt: [('SOUTH-', 'SOUTH'), ('-illustrated.', 'illustrated.'), ('-disposed', 'disposed')]
WMH19040420-V02-16-page1.txt: [('Zi-', 'Zi'), ('for-', 'for')]
WMH19040420-V02-16-page3.txt: [('-the', 'the'), ('-', ''), ('-DR.', 'DR.'), ('Three-', 'Three')]
WMH19040420-V02-16-page4.txt: [('-', ''), ('-', '')]
WMH19040427-V02-17-page2.txt: [('The-', 'The'), ('-', '')]
WMH19040427-V02-17-page3.txt: [('-', '')]
WMH19040427-V02-17-page4.txt: [('Swed-', 'Swed'), ('-', '')]
WMH19040504-V02-18-page2.txt: [('-', '')]
WMH19040504-V02-18-page3.txt: [('-its', 'its'), ('-of', 'of')]
WMH19040504-V02-18-page4.txt: [('-', '')]
WMH19040511-V02-19-page2.txt: [('-', ''), ('---of', '--of')]
WMH19040511-V02-19-page3.txt: [('-', ''), ('-', ''), ('mail-', 'mail'), ('-', ''), ('-', ''), ('-truths', 'truths'), ('-', ''), ('-Allegan', 'Allegan')]
WMH19040511-V02-19-page4.txt: [('-Remember', 'Remember'), ('-', ''), ('be-', 'be')]
WMH19040518-V02-20-page2.txt: [('-be', 'be')]
WMH19040518-V02-20-page3.txt: [('-note', 'note'), ('-mentioned', 'mentioned')]
WMH19040518-V02-20-page4.txt: [('inter.-', 'inter.'), ('-', ''), ('-blessed', 'blessed'), ('-', '')]
WMH19040601-V02-22-page1.txt: [('Ohio--', 'Ohio-'), ('-', ''), ('-Irwin', 'Irwin'), ('Andrea-', 'Andrea')]
WMH19040601-V02-22-page2.txt: [('-the', 'the')]
WMH19040608-V02-23-page1.txt: [('-A.', 'A.'), ('Treasurer-D.-', 'Treasurer-D.'), ('-', ''), ('-', ''), ("-urged'", "urged'")]
WMH19040608-V02-23-page2.txt: [('-', ''), ('dis-', 'dis'), ('-', ''), ('-', ''), ('-', ''), ('in-', 'in')]
WMH19040608-V02-23-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-God', 'God'), ('-to', 'to'), ('en-', 'en')]
WMH19040608-V02-23-page4.txt: [('De-', 'De'), ('-', '')]
WMH19040622-V02-24-page3.txt: [('-', ''), ('for-', 'for'), ('-', '')]
WMH19040622-V02-24-page4.txt: [('-', ''), ('Some-', 'Some'), ('-', '')]
WMH19040629-V02-25-page1.txt: [('-cultivate', 'cultivate'), ('-', ''), ('-', ''), ('or-', 'or')]
WMH19040629-V02-25-page2.txt: [('-this', 'this'), ('-be', 'be')]
WMH19040629-V02-25-page3.txt: [('-', ''), ('-five.', 'five.')]
WMH19040629-V02-25-page4.txt: [('-', '')]
WMH19040706-V02-26-page2.txt: [('con-', 'con'), ('attend-', 'attend'), ('Mission--', 'Mission-')]
WMH19040706-V02-26-page3.txt: [('--', '-'), ('-', ''), ('-', ''), ('-and', 'and'), ('world-', 'world')]
WMH19040713-V02-27-page1.txt: [('-', ''), ('CIRCUM-', 'CIRCUM'), ('ELECT-', 'ELECT'), ('DEFI-', 'DEFI')]
WMH19040713-V02-27-page2.txt: [('-ALL', 'ALL')]
WMH19040713-V02-27-page3.txt: [('HERALD.-', 'HERALD.'), ('-mee', 'mee'), ('-people', 'people'), ('-', ''), ('-whom', 'whom'), ('-', ''), ('par-', 'par')]
WMH19040720-V02-28-page1.txt: [('Vox-', 'Vox'), ('-a', 'a'), ('-principles', 'principles')]
WMH19040720-V02-28-page3.txt: [('-in', 'in')]
WMH19040720-V02-28-page4.txt: [('-', ''), ('Na-', 'Na'), ('-The', 'The'), ('-', ''), ('-', ''), ('announ-', 'announ'), ('---"Our', '--"Our')]
WMH19040727-V02-29-page1.txt: [('-', ''), ('straw-', 'straw'), ('-', ''), ('lights-', 'lights'), ('-Righteousness', 'Righteousness')]
WMH19040727-V02-29-page2.txt: [('--That', '-That'), ('-A', 'A')]
WMH19040727-V02-29-page3.txt: [('-may', 'may'), ('-small', 'small'), ('-', ''), ('--helpful', '-helpful')]
WMH19040803-V02-30-page1.txt: [('-the', 'the'), ('-', ''), ('round-', 'round'), ('-On', 'On'), ('-ether', 'ether'), ('purchas-', 'purchas'), ('.-', '.'), ('-not', 'not')]
WMH19040803-V02-30-page2.txt: [('-', ''), ('-', ''), ('-of', 'of')]
WMH19040803-V02-30-page3.txt: [('ex-', 'ex'), ('-church', 'church'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19040803-V02-30-page4.txt: [('-', ''), ('Van-', 'Van'), ('Van-', 'Van')]
WMH19040810-V02-31-page1.txt: [('-finding', 'finding'), ('-great', 'great'), ('South-', 'South'), ('-', ''), ('giv-', 'giv'), ('-', ''), ('peo-', 'peo')]
WMH19040810-V02-31-page3.txt: [('-', '')]
WMH19040810-V02-31-page4.txt: [('la-', 'la'), ('Sabbath-', 'Sabbath'), ('-', '')]
WMH19040817-V02-32-page1.txt: [('-', ''), ('-', ''), ('con-', 'con')]
WMH19040817-V02-32-page2.txt: [('-to', 'to'), ('-Dr.', 'Dr.')]
WMH19040817-V02-32-page3.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19040817-V02-32-page4.txt: [('benefit.-', 'benefit.')]
WMH19040831-V02-33-page1.txt: [('THEM-', 'THEM')]
WMH19040831-V02-33-page3.txt: [('repair-', 'repair')]
WMH19040831-V02-33-page4.txt: [('Howe-', 'Howe'), ('-', '')]
WMH19040914-V02-34-page1.txt: [('--', '-'), ('-', ''), ('-', '')]
WMH19040914-V02-34-page2.txt: [('--', '-')]
WMH19040914-V02-34-page3.txt: [('-and', 'and'), ('-', '')]
WMH19040921-V02-34a-page3.txt: [('con-', 'con'), ('-', ''), ('-', '')]
WMH19040921-V02-34a-page4.txt: [('II-', 'II')]
WMH19040928-V02-35-page1.txt: [('CAMP-', 'CAMP'), ('of-', 'of')]
WMH19040928-V02-35-page2.txt: [('-', '')]
WMH19040928-V02-35-page3.txt: [('pray-', 'pray')]
WMH19040928-V02-35-page4.txt: [('-', ''), ('-Miss', 'Miss'), ('-', ''), ('--', '-')]
WMH19041005-V02-36-page1.txt: [('House-to-', 'House-to'), ('librari-', 'librari'), ('"-', '"')]
WMH19041005-V02-36-page2.txt: [('-I', 'I'), ('-', '')]
WMH19041005-V02-36-page3.txt: [('indications-', 'indications'), ('-', ''), ('THANK-', 'THANK'), ('-have', 'have'), ('-breads', 'breads'), ('-local', 'local')]
WMH19041005-V02-36-page4.txt: [('-', '')]
WMH19041012-V02-37-page2.txt: [('Ad-', 'Ad'), ('-', ''), ('-', ''), ('at-', 'at')]
WMH19041012-V02-37-page4.txt: [('-conference', 'conference')]
WMH19041019-V02-38-page1.txt: [('done--', 'done-')]
WMH19041019-V02-38-page3.txt: [('ap-', 'ap'), ('perform-', 'perform'), ('-', ''), ('-tends', 'tends')]
WMH19041019-V02-38-page4.txt: [('Sabbath--', 'Sabbath-'), ('-Nashville', 'Nashville')]
WMH19041026-V02-39-page1.txt: [('disci-', 'disci')]
WMH19041026-V02-39-page2.txt: [('EN-', 'EN'), ('PEO-', 'PEO'), ('DISAP-', 'DISAP')]
WMH19041026-V02-39-page3.txt: [('PRE-', 'PRE'), ('Me-', 'Me'), ('at-', 'at')]
WMH19041026-V02-39-page4.txt: [('in-', 'in'), ('-', ''), ('Haughey-', 'Haughey')]
WMH19041102-V02-40-page1.txt: [('Lga-', 'Lga'), ('-West', 'West'), ('En-', 'En'), ('-themselves', 'themselves'), ('-', '')]
WMH19041102-V02-40-page2.txt: [('--about', '-about'), ('-this', 'this'), ('-hoped', 'hoped')]
WMH19041102-V02-40-page3.txt: [('-', ''), ('-asked', 'asked'), ('connec-', 'connec')]
WMH19041102-V02-40-page4.txt: [('Sabbath-', 'Sabbath'), ('-some', 'some'), ('-', '')]
WMH19041109-V02-41-page1.txt: [('San-', 'San')]
WMH19041109-V02-41-page2.txt: [('non-', 'non'), ('ever-', 'ever')]
WMH19041109-V02-41-page4.txt: [('in-', 'in')]
WMH19041116-V02-42-page1.txt: [('-', '')]
WMH19041116-V02-42-page4.txt: [('-', ''), ('Mc-', 'Mc'), ('Health-', 'Health')]
WMH19041123-V02-43-page1.txt: [('connected--', 'connected-'), ('-', ''), ('-is', 'is'), ('-and', 'and'), ('-', '')]
WMH19041123-V02-43-page2.txt: [('hun-', 'hun')]
WMH19041123-V02-43-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Sabbath-', 'Sabbath'), ('-could', 'could')]
WMH19041123-V02-43-page4.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19041130-V02-44-page1.txt: [('Mc-', 'Mc'), ('-West', 'West')]
WMH19041130-V02-44-page3.txt: [('-', ''), ('discour-', 'discour'), ('NEAT-', 'NEAT'), ('-', ''), ('-', '')]
WMH19041130-V02-44-page4.txt: [('io-', 'io'), ('-', '')]
WMH19041207-V02-45-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('Ave-', 'Ave')]
WMH19041207-V02-45-page2.txt: [('-and', 'and'), ('--No.', '-No.'), ('CHRIST-', 'CHRIST'), ('To-', 'To'), ('Paw-', 'Paw'), ('confer-', 'confer')]
WMH19041207-V02-45-page3.txt: [('-', ''), ('-', '')]
WMH19041207-V02-45-page4.txt: [('-aged', 'aged'), ('resurrection.-', 'resurrection.')]
WMH19041214-V02-46-page1.txt: [('o-', 'o'), ('-', '')]
WMH19041214-V02-46-page2.txt: [('-courage', 'courage')]
WMH19041214-V02-46-page3.txt: [('-through', 'through'), ('at-', 'at'), ('-', '')]
WMH19041214-V02-46-page4.txt: [('-to', 'to'), ('Sab-', 'Sab'), ('an-', 'an'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19041221-V02-47-page1.txt: [('Education-', 'Education'), ('-to', 'to'), ('and-', 'and'), ('-', ''), ('-and', 'and'), ('-grateful', 'grateful')]
WMH19041221-V02-47-page2.txt: [('-', ''), ('teach-', 'teach')]
WMH19041221-V02-47-page3.txt: [('-church', 'church'), ('WATCH-', 'WATCH'), ('-work', 'work'), ('NEC-', 'NEC'), ('DE-', 'DE'), ('EDI-', 'EDI'), ('REG-', 'REG'), ('-', ''), ('De-', 'De')]
WMH19041221-V02-47-page4.txt: [('-', ''), ('-', ''), ('be-', 'be'), ('-', ''), ('WATCH-', 'WATCH'), ('-of', 'of')]
WMH19041228-V02-48-page1.txt: [('con-', 'con'), ('-', ''), ('-', ''), ('-during', 'during')]
WMH19041228-V02-48-page2.txt: [('-', '')]
WMH19041228-V02-48-page3.txt: [('-or', 'or')]
WMH19041228-V02-48-page4.txt: [('Sabbath-', 'Sabbath'), ('-', ''), ('-', ''), ('Sabbath-', 'Sabbath')]
WMH19050104-V03-01-page1.txt: [('-', ''), ('Con-', 'Con'), ('-', '')]
WMH19050104-V03-01-page2.txt: [('Orange-', 'Orange'), ('-', ''), ('-previous', 'previous')]
WMH19050104-V03-01-page3.txt: [('--but', '-but'), ('-not', 'not')]
WMH19050104-V03-01-page4.txt: [('-', '')]
WMH19050111-V03-02-page1.txt: [('o-', 'o')]
WMH19050111-V03-02-page2.txt: [('bap-', 'bap'), ('-', ''), ('-', ''), ('-', '')]
WMH19050111-V03-02-page3.txt: [('-', ''), ('-', ''), ('-difficult', 'difficult'), ('-and', 'and'), ('-new', 'new')]
WMH19050111-V03-02-page4.txt: [('-have', 'have'), ('faith-', 'faith'), ('-ful', 'ful'), ('zo-', 'zo'), ('Sabbath-', 'Sabbath')]
WMH19050118-V03-03-page1.txt: [('V-', 'V')]
WMH19050118-V03-03-page2.txt: [('-', '')]
WMH19050118-V03-03-page4.txt: [('-G.', 'G.'), ('-', '')]
WMH19050201-V03-04-page1.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19050201-V03-04-page2.txt: [('-', ''), ('-', '')]
WMH19050201-V03-04-page4.txt: [("-YOUTH'S", "YOUTH'S"), ('-page', 'page'), ('Mich-', 'Mich'), ('-', ''), ('-', ''), ('-', '')]
WMH19050208-V03-05-page1.txt: [('-', ''), ('Cre-', 'Cre')]
WMH19050208-V03-05-page2.txt: [('-over', 'over'), ('corn-', 'corn')]
WMH19050208-V03-05-page3.txt: [('-to', 'to')]
WMH19050208-V03-05-page4.txt: [('-some', 'some')]
WMH19050215-V03-06-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19050215-V03-06-page3.txt: [('-', '')]
WMH19050215-V03-06-page4.txt: [('-for', 'for'), ('-', '')]
WMH19050222-V03-07-page1.txt: [('-', ''), ('.-', '.'), ('reports-', 'reports'), ('-', ''), ('-', '')]
WMH19050222-V03-07-page2.txt: [('-', ''), ('-', '')]
WMH19050222-V03-07-page3.txt: [('-', ''), ('-', ''), ('-We', 'We'), ('CAN-', 'CAN')]
WMH19050222-V03-07-page4.txt: [('con-', 'con')]
WMH19050301-V03-08-page1.txt: [('W.-', 'W.'), ('Mc-', 'Mc')]
WMH19050301-V03-08-page4.txt: [('-', '')]
WMH19050315-V03-10-page1.txt: [('-', ''), ('-', ''), ('-sending', 'sending'), ('De-', 'De')]
WMH19050315-V03-10-page3.txt: [('-proclaim', 'proclaim'), ('-and', 'and')]
WMH19050315-V03-10-page4.txt: [('agnos-', 'agnos'), ('ordi-', 'ordi')]
WMH19050322-V03-11-page1.txt: [('-HERALD.', 'HERALD.')]
WMH19050322-V03-11-page2.txt: [('-the', 'the'), ('-it', 'it'), ('-incident', 'incident'), ('at-', 'at')]
WMH19050322-V03-11-page3.txt: [('-oldest', 'oldest'), ('-became', 'became'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('hall-', 'hall'), ('neces-', 'neces')]
WMH19050322-V03-11-page5.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('C-', 'C'), ('G-', 'G'), ('-', '')]
WMH19050322-V03-11-page6.txt: [('-', ''), ('-', '')]
WMH19050329-V03-12-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Confer-', 'Confer'), ('-', ''), ('Depart-', 'Depart')]
WMH19050329-V03-12-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19050329-V03-12-page4.txt: [('-', ''), ('-"The', '"The')]
WMH19050405-V03-13-page1.txt: [('I--', 'I-'), ('-', ''), ('CON-', 'CON')]
WMH19050405-V03-13-page4.txt: [('-', '')]
WMH19050413-V03-14-page1.txt: [('-', '')]
WMH19050413-V03-14-page2.txt: [('-', '')]
WMH19050413-V03-14-page3.txt: [('GIV-', 'GIV'), ('-disciplined', 'disciplined')]
WMH19050419-V03-15-page1.txt: [('-', ''), ('-', ''), ('confer-', 'confer')]
WMH19050419-V03-15-page3.txt: [('So-', 'So')]
WMH19050419-V03-15-page4.txt: [('-Dr.', 'Dr.')]
WMH19050426-V03-16-page1.txt: [('-', ''), ('Roth-', 'Roth')]
WMH19050426-V03-16-page2.txt: [('-', ''), ('-', ''), ('of-', 'of')]
WMH19050426-V03-16-page3.txt: [('-knees', 'knees')]
WMH19050426-V03-16-page4.txt: [('cur-', 'cur'), ('-', ''), ('type-', 'type')]
WMH19050503-V03-17-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('UTTER-', 'UTTER'), ('Ob-', 'Ob')]
WMH19050503-V03-17-page2.txt: [('activity-', 'activity'), ('in-', 'in')]
WMH19050503-V03-17-page3.txt: [('-THE', 'THE'), ('-well-officered', 'well-officered'), ('-and', 'and'), ('Pennsyl-', 'Pennsyl')]
WMH19050503-V03-17-page4.txt: [('-', '')]
WMH19050510-V03-18-page1.txt: [('di-', 'di'), ('-of', 'of'), ('teach-', 'teach')]
WMH19050510-V03-18-page2.txt: [('-', '')]
WMH19050510-V03-18-page4.txt: [('Seventh-', 'Seventh')]
WMH19050517-V03-19-page1.txt: [('lead-', 'lead'), ('-', '')]
WMH19050517-V03-19-page3.txt: [('-of', 'of')]
WMH19050517-V03-19-page4.txt: [('Confer-', 'Confer'), ('-ence', 'ence')]
WMH19050524-V03-20-page2.txt: [('call-', 'call')]
WMH19050524-V03-20-page3.txt: [('-possession.', 'possession.')]
WMH19050524-V03-20-page4.txt: [('-', '')]
WMH19050531-V03-21-page1.txt: [('-', '')]
WMH19050531-V03-21-page3.txt: [('MICH-', 'MICH'), ('-"Missionary', '"Missionary'), ('-be', 'be')]
WMH19050531-V03-21-page4.txt: [('-', '')]
WMH19050607-V03-22-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--live', '-live')]
WMH19050607-V03-22-page3.txt: [('-and', 'and'), ('-Encyclopedia', 'Encyclopedia'), ('-Luke', 'Luke'), ('-Isa.', 'Isa.'), ('-', ''), ('-', ''), ('-Matt.', 'Matt.'), ('-Jews', 'Jews'), ('--Heb.', '-Heb.'), ('-Rev.', 'Rev.'), ('-Isa.', 'Isa.'), ('-Neh.', 'Neh.'), ('-Ex.', 'Ex.'), ('-', ''), ('---Gen.', '--Gen.'), ('-', ''), ('--Gen.', '-Gen.'), ('-', ''), ('-', ''), ('-', ''), ('-Isa.', 'Isa.'), ('-', '')]
WMH19050607-V03-22-page4.txt: [('-', '')]
WMH19050614-V03-23-page2.txt: [('---"I', '--"I'), ('-', '')]
WMH19050614-V03-23-page3.txt: [('les-', 'les')]
WMH19050614-V03-23-page4.txt: [('-', ''), ('quar-', 'quar')]
WMH19050621-V03-24-page1.txt: [('-', ''), ('edu-', 'edu')]
WMH19050621-V03-24-page2.txt: [('-may', 'may'), ('-', ''), ('-', '')]
WMH19050621-V03-24-page3.txt: [('-', '')]
WMH19050621-V03-24-page4.txt: [('-', ''), ('-', '')]
WMH19050628-V03-25-page1.txt: [('-', ''), ('con-', 'con'), ('-', '')]
WMH19050705-V03-26-page1.txt: [('-s', 's'), ('a-', 'a'), ('--Selected.', '-Selected.'), ('Un-', 'Un')]
WMH19050705-V03-26-page2.txt: [('-by', 'by'), ('-', '')]
WMH19050705-V03-26-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-in', 'in'), ('gain-', 'gain'), ('-', '')]
WMH19050712-V03-27-page1.txt: [('re-', 're')]
WMH19050712-V03-27-page2.txt: [('Self-', 'Self'), ('defi-', 'defi')]
WMH19050712-V03-27-page3.txt: [('month-', 'month'), ('superintend-', 'superintend'), ('--Selected.', '-Selected.'), ('-', ''), ('-', ''), ('Sabbath-', 'Sabbath'), ('ef-', 'ef')]
WMH19050712-V03-27-page4.txt: [('-pain', 'pain')]
WMH19050719-V03-28-page1.txt: [('-', ''), ('arrang-', 'arrang')]
WMH19050719-V03-28-page3.txt: [('-We', 'We')]
WMH19050719-V03-28-page4.txt: [('-', '')]
WMH19050726-V03-29-page1.txt: [('-with', 'with'), ('CAMP-', 'CAMP'), ('-', ''), ('-', ''), ('-', '')]
WMH19050726-V03-29-page3.txt: [('being--', 'being-'), ('-to', 'to'), ('-', ''), ('-', '')]
WMH19050726-V03-29-page4.txt: [('-', ''), ('-', '')]
WMH19050802-V03-30-page1.txt: [('Camp-', 'Camp'), ('---health', '--health'), ('--for', '-for'), ('Camp-', 'Camp'), ('-', '')]
WMH19050802-V03-30-page2.txt: [('asked-', 'asked')]
WMH19050802-V03-30-page3.txt: [('Sabbath-', 'Sabbath'), ('--Selected.', '-Selected.')]
WMH19050802-V03-30-page4.txt: [('corn-', 'corn'), ('-', ''), ('Organiza-', 'Organiza')]
WMH19050809-V03-31-page1.txt: [('-', ''), ('--', '-'), ('Camp-', 'Camp'), ('-', '')]
WMH19050809-V03-31-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19050809-V03-31-page3.txt: [('-', ''), ('neces-', 'neces'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19050816-V03-32-page1.txt: [('inter-', 'inter'), ('-', '')]
WMH19050816-V03-32-page2.txt: [('fol-', 'fol'), ('them-', 'them')]
WMH19050816-V03-32-page3.txt: [('-Foster', 'Foster')]
WMH19050816-V03-32-page4.txt: [('-EZRA', 'EZRA')]
WMH19050830-V03-33-page2.txt: [('-', '')]
WMH19050830-V03-33-page3.txt: [('-', ''), ('-cents', 'cents'), ('-', ''), ('-', '')]
WMH19050830-V03-33-page4.txt: [('-to', 'to'), ('-book', 'book'), ('HER-', 'HER')]
WMH19050906-V03-34-page1.txt: [('Sabbath-', 'Sabbath')]
WMH19050906-V03-34-page2.txt: [('DEPART-', 'DEPART')]
WMH19050906-V03-34-page3.txt: [('--Selected.', '-Selected.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Camp-meet-', 'Camp-meet')]
WMH19050906-V03-34-page4.txt: [('-', ''), ('-at', 'at'), ('-', ''), ('work.-', 'work.'), ('-', ''), ('-', '')]
WMH19050913-V03-35-page1.txt: [('what-', 'what')]
WMH19050913-V03-35-page3.txt: [('--', '-'), ('under-', 'under')]
WMH19050913-V03-35-page4.txt: [('-', ''), ('-', '')]
WMH19050920-V03-36-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19050920-V03-36-page2.txt: [('Sabbath-', 'Sabbath')]
WMH19050920-V03-36-page3.txt: [('-purpose', 'purpose')]
WMH19050920-V03-36-page4.txt: [('-', ''), ('-son', 'son'), ('REVIEW--', 'REVIEW-'), ('SIGNS-', 'SIGNS'), ('HEALTH-', 'HEALTH')]
WMH19050927-V03-37-page1.txt: [('-', ''), ('-I', 'I')]
WMH19050927-V03-37-page2.txt: [('-', ''), ('-low', 'low'), ('im-', 'im')]
WMH19050927-V03-37-page3.txt: [('cher-', 'cher')]
WMH19051004-V03-38-page1.txt: [('ad-', 'ad'), ('-', ''), ('-', ''), ('-', ''), ('Ad-', 'Ad')]
WMH19051004-V03-38-page2.txt: [('-', '')]
WMH19051004-V03-38-page3.txt: [('--Selected.', '-Selected.'), ('coun-', 'coun'), ('Yose-', 'Yose')]
WMH19051004-V03-38-page4.txt: [('and-', 'and'), ('thepro-', 'thepro')]
WMH19051011-V03-39-page1.txt: [('-', '')]
WMH19051011-V03-39-page2.txt: [('Sab-', 'Sab')]
WMH19051018-V03-40-page1.txt: [('corn-', 'corn')]
WMH19051018-V03-40-page2.txt: [('commandments-', 'commandments'), ('-much', 'much')]
WMH19051018-V03-40-page3.txt: [('-', ''), ('-', '')]
WMH19051018-V03-40-page4.txt: [('--Tarry', '-Tarry'), ('Bourdeau-', 'Bourdeau')]
WMH19051025-V03-41-page1.txt: [('-', '')]
WMH19051025-V03-41-page3.txt: [('--Selected.', '-Selected.'), ('pro-', 'pro')]
WMH19051025-V03-41-page4.txt: [('--also', '-also')]
WMH19051101-V03-42-page1.txt: [('future--', 'future-'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19051101-V03-42-page2.txt: [('Ad-', 'Ad')]
WMH19051101-V03-42-page3.txt: [('essential-', 'essential'), ('suf-', 'suf')]
WMH19051101-V03-42-page4.txt: [('-will', 'will'), ('MESSEN-', 'MESSEN'), ('-and', 'and'), ('-', '')]
WMH19051108-V03-43-page1.txt: [('domi-', 'domi'), ('-', ''), ('-', '')]
WMH19051108-V03-43-page3.txt: [('Mc-', 'Mc'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-Maple', 'Maple')]
WMH19051108-V03-43-page4.txt: [('-of', 'of'), ('-', ''), ('-Editor', 'Editor')]
WMH19051122-V03-45-page1.txt: [('-', ''), ('-', ''), ('-"I\'ll', '"I\'ll'), ('-', ''), ('-', ''), ('A-', 'A'), ('-lambs', 'lambs'), ('Advent-', 'Advent')]
WMH19051122-V03-45-page2.txt: [('-', '')]
WMH19051122-V03-45-page4.txt: [('-', ''), ('-', ''), ('Broth-', 'Broth')]
WMH19051129-V03-46-page1.txt: [('-weary', 'weary'), ('sys-', 'sys')]
WMH19051129-V03-46-page2.txt: [('-', ''), ('in-', 'in')]
WMH19051129-V03-46-page3.txt: [('-this', 'this'), ('-', ''), ('-', '')]
WMH19051129-V03-46-page4.txt: [('LIT-', 'LIT'), ('LIT-', 'LIT')]
WMH19051206-V03-47-page1.txt: [('-our', 'our'), ('Cedar-', 'Cedar'), ('De-', 'De'), ('-', ''), ('--', '-'), ('interest-', 'interest')]
WMH19051206-V03-47-page2.txt: [('-', '')]
WMH19051206-V03-47-page3.txt: [('-fruit', 'fruit')]
WMH19051206-V03-47-page4.txt: [('ad-', 'ad'), ('-', '')]
WMH19051213-V03-48-page1.txt: [('recitation.-', 'recitation.')]
WMH19051213-V03-48-page2.txt: [('can-', 'can'), ('POT-', 'POT')]
WMH19051213-V03-48-page3.txt: [('-', ''), ('-', '')]
WMH19051213-V03-48-page4.txt: [('copies.-', 'copies.'), ('-Creek', 'Creek'), ('var-', 'var')]
WMH19051220-V03-49-page1.txt: [('right-', 'right'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19051220-V03-49-page2.txt: [('ADVENTIST-', 'ADVENTIST'), ('CO-', 'CO'), ('-', ''), ('Ix-', 'Ix')]
WMH19051220-V03-49-page3.txt: [('-', ''), ('-', '')]
WMH19051227-V03-50-page2.txt: [('-', ''), ("People's-", "People's"), ('-given', 'given'), ('-etc.', 'etc.'), ('-', '')]
WMH19051227-V03-50-page3.txt: [('-', ''), ('-Although', 'Although'), ('o-', 'o'), ('-', ''), ('Her-', 'Her')]
WMH19060103-V04-01-page1.txt: [('consider-', 'consider'), ('ask-', 'ask'), ('-.', '.')]
WMH19060103-V04-01-page2.txt: [('years..-', 'years..'), ('sin-', 'sin')]
WMH19060103-V04-01-page3.txt: [('Self-', 'Self'), ('birth-', 'birth')]
WMH19060103-V04-01-page4.txt: [('-', ''), ('-', '')]
WMH19060110-V04-02-page1.txt: [('-fifteen', 'fifteen'), ('-', '')]
WMH19060110-V04-02-page2.txt: [('-', ''), ('-Paw', 'Paw'), ('-', ''), ('-', ''), ('-', '')]
WMH19060110-V04-02-page4.txt: [('-', '')]
WMH19060117-V04-03-page1.txt: [('Secretary--', 'Secretary-'), ('-', '')]
WMH19060117-V04-03-page2.txt: [('CONFER-', 'CONFER')]
WMH19060117-V04-03-page3.txt: [('-loans', 'loans'), ('-', ''), ('R-', 'R'), ('Mich-', 'Mich'), ('-', '')]
WMH19060117-V04-03-page4.txt: [('-', ''), ('Offer-', 'Offer'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060124-V04-04-page1.txt: [('--jr', '-jr'), ('--', '-'), ('-', ''), ('-', ''), ('SABBATH-', 'SABBATH'), ('-', '')]
WMH19060124-V04-04-page2.txt: [('INCORPO-', 'INCORPO'), ('aggre-', 'aggre')]
WMH19060124-V04-04-page3.txt: [('Mc-', 'Mc'), ('-', ''), ('-', ''), ('-', '')]
WMH19060124-V04-04-page4.txt: [('-', ''), ('-', '')]
WMH19060131-V04-05-page1.txt: [('------', '-----'), ('-', ''), ('-luessiorpi', 'luessiorpi')]
WMH19060131-V04-05-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('Mis-', 'Mis')]
WMH19060131-V04-05-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('sub-', 'sub'), ('-', '')]
WMH19060207-V04-06-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060207-V04-06-page3.txt: [('Spirit-', 'Spirit'), ('-', ''), ('place.-', 'place.')]
WMH19060214-V04-07-page1.txt: [('t-', 't'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060214-V04-07-page2.txt: [('-', ''), ('-Lessons', 'Lessons')]
WMH19060214-V04-07-page3.txt: [('-all', 'all'), ('-the', 'the'), ('Danish-', 'Danish')]
WMH19060214-V04-07-page4.txt: [('BOOK.-', 'BOOK.'), ('fitthem-', 'fitthem')]
WMH19060221-V04-08-page1.txt: [('--', '-'), ("''-is'-", "''-is'"), ('develop-', 'develop'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060221-V04-08-page2.txt: [('-', ''), ('adopt-', 'adopt')]
WMH19060221-V04-08-page3.txt: [('themselves-', 'themselves')]
WMH19060221-V04-08-page4.txt: [('-were', 'were')]
WMH19060228-V04-09-page1.txt: [('in-', 'in'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060228-V04-09-page2.txt: [('-', '')]
WMH19060228-V04-09-page3.txt: [('-', ''), ('me-', 'me')]
WMH19060307-V04-10-page1.txt: [('-', ''), ('Vice-', 'Vice'), ('-', ''), ('-', '')]
WMH19060307-V04-10-page2.txt: [('--the', '-the'), ('-', '')]
WMH19060307-V04-10-page3.txt: [('-', ''), ('-theory', 'theory')]
WMH19060307-V04-10-page4.txt: [('in-', 'in'), ('-', ''), ('-', ''), ('-', '')]
WMH19060314-V04-11-page1.txt: [('-', ''), ('con-', 'con'), ('-', ''), ('-', '')]
WMH19060314-V04-11-page2.txt: [('-', '')]
WMH19060314-V04-11-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('sug-', 'sug')]
WMH19060314-V04-11-page4.txt: [('way.-', 'way.'), ('MICHI-', 'MICHI'), ('-', '')]
WMH19060321-V04-12-page1.txt: [('con-', 'con'), ('-', '')]
WMH19060321-V04-12-page2.txt: [('HER-', 'HER')]
WMH19060321-V04-12-page3.txt: [('spelling--', 'spelling-'), ('-future', 'future'), ('San-', 'San')]
WMH19060321-V04-12-page4.txt: [('-Will', 'Will')]
WMH19060328-V04-13-page1.txt: [('--', '-'), ('GATHERETI-', 'GATHERETI'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('sup-', 'sup')]
WMH19060328-V04-13-page2.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19060328-V04-13-page3.txt: [('-', ''), ('under-', 'under'), ('receiver-', 'receiver')]
WMH19060328-V04-13-page4.txt: [('--', '-'), ('-', ''), ('-', ''), ('-', '')]
WMH19060404-V04-14-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060404-V04-14-page3.txt: [('-Uttered', 'Uttered'), ('--', '-')]
WMH19060404-V04-14-page4.txt: [('-', '')]
WMH19060411-V04-15-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Conven-', 'Conven')]
WMH19060411-V04-15-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Offer-', 'Offer'), ('-', '')]
WMH19060411-V04-15-page3.txt: [('-', ''), ('-ro', 'ro')]
WMH19060411-V04-15-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060418-V04-16-page1.txt: [('GATHERETI-', 'GATHERETI'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('---the', '--the'), ('-', ''), ('-', ''), ('work-', 'work')]
WMH19060418-V04-16-page3.txt: [('-', ''), ('-it', 'it')]
WMH19060418-V04-16-page4.txt: [('Hunts-', 'Hunts'), ('-', '')]
WMH19060425-V04-17-page1.txt: [('-in', 'in'), ('build-', 'build'), ('-', '')]
WMH19060425-V04-17-page2.txt: [('--FLORENCE', '-FLORENCE'), ('right-', 'right')]
WMH19060425-V04-17-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('SABBATH-', 'SABBATH')]
WMH19060425-V04-17-page4.txt: [('AD-', 'AD'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060502-V04-18-page1.txt: [('-', ''), ('suf-', 'suf'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-')]
WMH19060502-V04-18-page3.txt: [('excel-', 'excel'), ('-lent', 'lent'), ('ARBEI-', 'ARBEI'), ('arrange-', 'arrange'), ('in-', 'in')]
WMH19060502-V04-18-page4.txt: [('-to', 'to'), ('-church', 'church')]
WMH19060509-V04-19-page1.txt: [('-', ''), ('GATHERED-', 'GATHERED'), ('"---', '"--')]
WMH19060509-V04-19-page2.txt: [('-I', 'I'), ('-', ''), ('-', ''), ('-.God.', '.God.'), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('PROPH-', 'PROPH'), ('PROPH-', 'PROPH'), ('PROPH-', 'PROPH'), ('Lakeview-', 'Lakeview')]
WMH19060509-V04-19-page3.txt: [('-', ''), ('-', ''), ('Rogers-', 'Rogers'), ('-', ''), ('-ho', 'ho'), ('near-', 'near'), ('be-', 'be'), ('mat-', 'mat'), ('-', '')]
WMH19060523-V04-20-page2.txt: [('-', ''), ('-man', 'man')]
WMH19060523-V04-20-page3.txt: [('-', ''), ('-', '')]
WMH19060530-V04-21-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-school', 'school'), ('-', ''), ('-', ''), ('-', '')]
WMH19060530-V04-21-page2.txt: [('-', ''), ('-', ''), ('denomi-', 'denomi')]
WMH19060530-V04-21-page3.txt: [('im-', 'im')]
WMH19060530-V04-21-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060606-V04-22-page1.txt: [('..-', '..')]
WMH19060606-V04-22-page2.txt: [('-', '')]
WMH19060606-V04-22-page3.txt: [('-Sec.', 'Sec.'), ('.-', '.'), ('SUP-', 'SUP'), ('publishers.compliment-', 'publishers.compliment')]
WMH19060606-V04-22-page4.txt: [('-Literary', 'Literary'), ('Michi-', 'Michi'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060613-V04-23-page1.txt: [('-Sabbath-school', 'Sabbath-school'), ('-Kalamazoo', 'Kalamazoo')]
WMH19060613-V04-23-page3.txt: [('-', ''), ('-', ''), ('mail-', 'mail'), ('-', '')]
WMH19060613-V04-23-page4.txt: [('ES-', 'ES'), ('Missis.-', 'Missis.'), ('-', '')]
WMH19060620-V04-24-page1.txt: [('-', ''), ('-', ''), ('-utmost', 'utmost')]
WMH19060620-V04-24-page2.txt: [('REPENT-', 'REPENT'), ('RE-', 'RE'), ('REPENT-', 'REPENT'), ('-', '')]
WMH19060620-V04-24-page3.txt: [('-obedience', 'obedience'), ('-', '')]
WMH19060620-V04-24-page4.txt: [('returning-', 'returning'), ('ut-', 'ut')]
WMH19060627-V04-25-page1.txt: [('viz.--', 'viz.-'), ('-four-page', 'four-page'), ('-', ''), ('Im-', 'Im')]
WMH19060627-V04-25-page2.txt: [('-RST', 'RST'), ('sympaths-', 'sympaths')]
WMH19060627-V04-25-page4.txt: [('-', '')]
WMH19060704-V04-26-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('nee-', 'nee')]
WMH19060704-V04-26-page2.txt: [('-the', 'the'), ('some-', 'some')]
WMH19060704-V04-26-page3.txt: [('per-', 'per'), ('--Success.', '-Success.')]
WMH19060704-V04-26-page4.txt: [('-', '')]
WMH19060711-V04-27-page1.txt: [('in-', 'in')]
WMH19060711-V04-27-page2.txt: [('-', ''), ('Offerings-', 'Offerings'), ('-', '')]
WMH19060711-V04-27-page3.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19060711-V04-27-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('employ-', 'employ'), ('Contro-', 'Contro')]
WMH19060718-V04-28-page1.txt: [('-', ''), ('lir-', 'lir'), ('-', ''), ('-school', 'school'), ('-', ''), ('-', ''), ('-', ''), ('---', '--'), ('-', '')]
WMH19060718-V04-28-page2.txt: [('-', ''), ('ends--', 'ends-'), ('sup-', 'sup')]
WMH19060718-V04-28-page3.txt: [('Con-', 'Con')]
WMH19060718-V04-28-page4.txt: [('--', '-'), ('Pennsyl-', 'Pennsyl')]
WMH19060725-V04-29-page1.txt: [('-', ''), ('REAPETHGATHRETI-', 'REAPETHGATHRETI'), ('\'"--', '\'"-')]
WMH19060725-V04-29-page2.txt: [('es-', 'es'), ('re-', 're')]
WMH19060725-V04-29-page3.txt: [('COL-', 'COL')]
WMH19060725-V04-29-page4.txt: [('-', ''), ('Ed-', 'Ed'), ('Healing--', 'Healing-'), ('"Left-', '"Left'), ('Safe-', 'Safe')]
WMH19060801-V04-30-page1.txt: [('-', '')]
WMH19060801-V04-30-page2.txt: [('us-', 'us')]
WMH19060801-V04-30-page3.txt: [('"A"-', '"A"')]
WMH19060808-V04-31-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('APPRO-', 'APPRO'), ('CAN-', 'CAN'), ('REA-', 'REA')]
WMH19060808-V04-31-page3.txt: [('I-', 'I'), ('-', ''), ('re-', 're')]
WMH19060808-V04-31-page4.txt: [('.-', '.'), ('con-', 'con'), ('-followers', 'followers'), ('-', '')]
WMH19060822-V04-32-page1.txt: [('a-', 'a'), ('r-', 'r'), ('-', ''), ('-study', 'study'), ('-', '')]
WMH19060822-V04-32-page2.txt: [('-', '')]
WMH19060822-V04-32-page3.txt: [('-', ''), ('-', ''), ('to-day--', 'to-day-'), ('political--', 'political-')]
WMH19060822-V04-32-page4.txt: [('-had', 'had')]
WMH19060829-V04-33-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060829-V04-33-page2.txt: [('-the', 'the'), ('-one', 'one'), ('-', '')]
WMH19060829-V04-33-page3.txt: [('-', ''), ('-', '')]
WMH19060829-V04-33-page4.txt: [('-', '')]
WMH19060905-V04-34-page1.txt: [('-', ''), ('--Isaac', '-Isaac'), ('-', ''), ('so.-', 'so.')]
WMH19060905-V04-34-page2.txt: [('SPE-', 'SPE'), ('-', '')]
WMH19060905-V04-34-page3.txt: [('lle-', 'lle'), ('Confer-', 'Confer')]
WMH19060912-V04-35-page1.txt: [('pro-', 'pro'), ('-', '')]
WMH19060912-V04-35-page2.txt: [('-', ''), ('-six', 'six')]
WMH19060912-V04-35-page3.txt: [('re-', 're'), ('-Children', 'Children')]
WMH19060919-V04-36-page1.txt: [('-', ''), ('-by', 'by'), ('-', ''), ('-heart', 'heart'), ('-Exclaims', 'Exclaims'), ('uncorrupti-', 'uncorrupti')]
WMH19060919-V04-36-page2.txt: [('-', ''), ('-', ''), ('mem-', 'mem')]
WMH19060919-V04-36-page3.txt: [('-breaking', 'breaking')]
WMH19060919-V04-36-page4.txt: [('-and', 'and'), ('-', '')]
WMH19060926-V04-37-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('o-', 'o'), ('-', ''), ('-', ''), ('Mes-', 'Mes'), ('-', ''), ('-', ''), ('-', '')]
WMH19060926-V04-37-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060926-V04-37-page3.txt: [('schools.--', 'schools.-')]
WMH19060926-V04-37-page4.txt: [('"sulpherbag-', '"sulpherbag'), ('exalt-', 'exalt')]
WMH19061003-V04-38-page1.txt: [('-', ''), ('----', '---'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.')]
WMH19061003-V04-38-page4.txt: [('-', ''), ('-Sam', 'Sam'), ('-', '')]
WMH19061010-V04-39-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19061010-V04-39-page2.txt: [('-', ''), ('-', '')]
WMH19061010-V04-39-page3.txt: [('possible-', 'possible')]
WMH19061017-V04-40-page1.txt: [('EAST-', 'EAST'), ('SEND-', 'SEND'), ('TAK-', 'TAK'), ('-', ''), ('--', '-'), ('GATf.-', 'GATf.'), ('-', ''), ('-', ''), ('-', ''), ('con-', 'con')]
WMH19061017-V04-40-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Pub-', 'Pub'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('im-', 'im'), ('every-', 'every'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-variety', 'variety'), ('ad-', 'ad'), ('how-', 'how'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19061017-V04-40-page3.txt: [('intelligently-', 'intelligently'), ('es-', 'es'), ('SWAHN-', 'SWAHN'), ('sec-', 'sec')]
WMH19061017-V04-40-page4.txt: [('RyDER-', 'RyDER'), ('-with', 'with')]
WMH19061024-V04-41-page1.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19061024-V04-41-page3.txt: [('--Education.', '-Education.')]
WMH19061024-V04-41-page4.txt: [('-the', 'the')]
WMH19061031-V04-42-page1.txt: [('-wholly', 'wholly'), ('pur-', 'pur'), ('-', ''), ('-', ''), ('-', ''), ('INTERNA-', 'INTERNA'), ('INTER-', 'INTER')]
WMH19061031-V04-42-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('P-', 'P'), ('-', '')]
WMH19061031-V04-42-page3.txt: [('de-', 'de'), ('--', '-'), ('-----', '----'), ('hav-', 'hav')]
WMH19061107-V04-43-page1.txt: [('-', ''), ('-', ''), ('--This', '-This'), ('--', '-')]
WMH19061107-V04-43-page2.txt: [('---I', '--I')]
WMH19061107-V04-43-page3.txt: [('Gener-', 'Gener')]
WMH19061107-V04-43-page4.txt: [('-', '')]
WMH19061114-V04-44-page1.txt: [('-', ''), ('-', ''), ('--upmuscle', '-upmuscle'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--I', '-I')]
WMH19061114-V04-44-page2.txt: [('--I', '-I'), ('child-', 'child')]
WMH19061114-V04-44-page3.txt: [('submerg-', 'submerg')]
WMH19061114-V04-44-page4.txt: [('-the', 'the'), ('De-', 'De'), ('-', ''), ('-', '')]
WMH19061121-V04-45-page1.txt: [('-', ''), ('-that', 'that'), ('--I', '-I'), ('--labored', '-labored'), ('--Well', '-Well'), ('-', '')]
WMH19061121-V04-45-page2.txt: [('--Chr', '-Chr')]
WMH19061121-V04-45-page3.txt: [('-to', 'to'), ('doubt-', 'doubt'), ('in-', 'in')]
WMH19061121-V04-45-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19061128-V04-46-page1.txt: [('qmir-', 'qmir'), ('-', ''), ('work.-', 'work.')]
WMH19061128-V04-46-page2.txt: [('min-', 'min'), ('dis-', 'dis')]
WMH19061128-V04-46-page3.txt: [('re-', 're'), ('-', ''), ('let-', 'let')]
WMH19061128-V04-46-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19061205-V04-47-page1.txt: [('--Selected.', '-Selected.'), ('Sunday-', 'Sunday'), ('moun-', 'moun'), ('-', ''), ('-', '')]
WMH19061205-V04-47-page2.txt: [('-', '')]
WMH19061205-V04-47-page3.txt: [('-', ''), ('-', '')]
WMH19061205-V04-47-page4.txt: [('occasion.-', 'occasion.')]
WMH19061212-V04-48-page1.txt: [('-', ''), ('-Let', 'Let'), ('-the', 'the'), ('Wednes-', 'Wednes'), ('-', ''), ('-', ''), ('-', ''), ('work-', 'work'), ('-', ''), ('-', '')]
WMH19061212-V04-48-page2.txt: [('-', ''), ('AD-', 'AD')]
WMH19061212-V04-48-page3.txt: [('mani-', 'mani')]
WMH19061212-V04-48-page4.txt: [('-', ''), ('-', '')]
WMH19061219-V04-49-page1.txt: [('-', ''), ('-not', 'not'), ('--', '-')]
WMH19061219-V04-49-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19061226-V04-50-page1.txt: [('-', ''), ('--', '-'), ('-', ''), ('Sunday-', 'Sunday'), ('-', '')]
WMH19061226-V04-50-page3.txt: [('..-', '..'), ('-paper', 'paper'), ('-"Jesus', '"Jesus')]
WMH19061226-V04-50-page4.txt: [('--toe', '-toe')]
WMH19070102-V05-01-page1.txt: [('-that', 'that'), ('-on', 'on'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070102-V05-01-page2.txt: [('educa-', 'educa'), ('-', ''), ('-and', 'and'), ('Sunday-', 'Sunday')]
WMH19070102-V05-01-page4.txt: [('-of', 'of'), ('I-', 'I'), ('-', '')]
WMH19070109-V05-02-page1.txt: [('glar--', 'glar-'), ('ATI-', 'ATI'), ('-', '')]
WMH19070109-V05-02-page2.txt: [('the-', 'the'), ('Pres.-', 'Pres.')]
WMH19070109-V05-02-page3.txt: [('outpeo-', 'outpeo')]
WMH19070109-V05-02-page4.txt: [('-', ''), ('-', ''), ('De-', 'De'), ('-', '')]
WMH19070116-V05-03-page1.txt: [('-.', '.'), ('-', ''), ('-', ''), ('-take', 'take'), ('-the', 'the'), ('-come', 'come')]
WMH19070116-V05-03-page2.txt: [('or-', 'or'), ('-der', 'der')]
WMH19070116-V05-03-page3.txt: [('Fiske-', 'Fiske')]
WMH19070123-V05-04-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('W.J-', 'W.J'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070123-V05-04-page2.txt: [('-simple', 'simple'), ('ex-', 'ex'), ('-', '')]
WMH19070123-V05-04-page3.txt: [('-', ''), ('con-', 'con'), ('-of', 'of')]
WMH19070123-V05-04-page4.txt: [('opposi-', 'opposi'), ('-shed', 'shed')]
WMH19070130-V05-05-page1.txt: [('accord-', 'accord'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070130-V05-05-page4.txt: [('-', ''), ('RE-', 'RE')]
WMH19070206-V05-06-page1.txt: [('-', ''), ('----', '---'), ('-would', 'would'), ('camp-meet-', 'camp-meet'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070206-V05-06-page2.txt: [('Mt.-', 'Mt.'), ('-A', 'A'), ('-', ''), ('Barretr"-', 'Barretr"'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070206-V05-06-page3.txt: [('-denomination', 'denomination'), ('-', '')]
WMH19070206-V05-06-page4.txt: [('-is', 'is'), ('-similar', 'similar'), ('-', '')]
WMH19070213-V05-07-page1.txt: [('ac-', 'ac'), ('Depart-', 'Depart')]
WMH19070213-V05-07-page2.txt: [('-', '')]
WMH19070213-V05-07-page4.txt: [('.seal-', '.seal'), ('-and', 'and')]
WMH19070220-V05-08-page1.txt: [('-to', 'to')]
WMH19070220-V05-08-page2.txt: [('OFFER-', 'OFFER'), ('-', '')]
WMH19070220-V05-08-page3.txt: [('-', ''), ('-', '')]
WMH19070220-V05-08-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070227-V05-09-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('Confer-', 'Confer'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070227-V05-09-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070227-V05-09-page3.txt: [('Dis-', 'Dis'), ('-to', 'to'), ('-colporter', 'colporter'), ('truth.-', 'truth.')]
WMH19070227-V05-09-page4.txt: [('HERALD.-', 'HERALD.'), ('-', ''), ('-i', 'i'), ('--', '-'), ('-', ''), ('-', ''), ('-than', 'than'), ('Re-', 'Re')]
WMH19070306-V05-10-page1.txt: [('-stand', 'stand')]
WMH19070306-V05-10-page2.txt: [('SOLD-', 'SOLD'), ('-', '')]
WMH19070306-V05-10-page3.txt: [('LIBER-', 'LIBER')]
WMH19070306-V05-10-page4.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19070313-V05-11-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-plain', 'plain'), ('--"And', '-"And'), ('--met', '-met')]
WMH19070313-V05-11-page2.txt: [('-', '')]
WMH19070313-V05-11-page3.txt: [('--practical', '-practical')]
WMH19070313-V05-11-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('or-', 'or'), ('Consti-', 'Consti'), ('-', '')]
WMH19070320-V05-12-page2.txt: [('-', '')]
WMH19070320-V05-12-page3.txt: [('-r', 'r'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070327-V05-13-page3.txt: [('associat-', 'associat'), ('consci-', 'consci'), ('--', '-')]
WMH19070327-V05-13-page4.txt: [('--', '-'), ('RE-', 'RE'), ('de-', 'de'), ('-', '')]
WMH19070403-V05-14-page1.txt: [('-', ''), ('II-', 'II')]
WMH19070403-V05-14-page2.txt: [('-', ''), ('con-', 'con')]
WMH19070403-V05-14-page3.txt: [('-know', 'know')]
WMH19070403-V05-14-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070410-V05-15-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070410-V05-15-page2.txt: [('--', '-'), ('-', '')]
WMH19070410-V05-15-page4.txt: [('-', '')]
WMH19070417-V05-16-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('cor-', 'cor'), ('-', ''), ('-', ''), ('REAPETI-', 'REAPETI'), ('be-', 'be')]
WMH19070417-V05-16-page2.txt: [('Danish-', 'Danish')]
WMH19070417-V05-16-page3.txt: [('-', ''), ('--a', '-a')]
WMH19070417-V05-16-page4.txt: [('Pil-', 'Pil')]
WMH19070424-V05-17-page1.txt: [('GATHERED-', 'GATHERED'), ('-', ''), ('-', ''), ('corn-', 'corn'), ('-', ''), ('-', ''), ('-', '')]
WMH19070424-V05-17-page2.txt: [('the-', 'the'), ('IN-', 'IN'), ('IN-', 'IN'), ('-', '')]
WMH19070424-V05-17-page3.txt: [('oforthog-', 'oforthog')]
WMH19070424-V05-17-page4.txt: [('-', '')]
WMH19070501-V05-18-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('wasevery-', 'wasevery'), ('chap-', 'chap'), ('them-', 'them'), ('-', ''), ('-', ''), ('-', '')]
WMH19070501-V05-18-page2.txt: [('confede-', 'confede')]
WMH19070501-V05-18-page3.txt: [('devotedcanvas-', 'devotedcanvas')]
WMH19070501-V05-18-page4.txt: [('interested-', 'interested'), ('-', '')]
WMH19070508-V05-19-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('NOW.-', 'NOW.'), ('plain-', 'plain')]
WMH19070508-V05-19-page2.txt: [('-', ''), ('pur-', 'pur')]
WMH19070508-V05-19-page3.txt: [('-', ''), ('understand.--', 'understand.-')]
WMH19070508-V05-19-page4.txt: [('-', '')]
WMH19070515-V05-20-page1.txt: [('-', ''), ('-', ''), ('ac-', 'ac'), ('--', '-')]
WMH19070515-V05-20-page3.txt: [('Attor-', 'Attor'), ('-', ''), ('-', ''), ('Church-', 'Church'), ('-----', '----'), ('-', ''), ('-', '')]
WMH19070515-V05-20-page4.txt: [('--he', '-he'), ('Conference-', 'Conference'), ('WATCH-', 'WATCH'), ('-', '')]
WMH19070522-V05-21-page1.txt: [('-es', 'es'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070522-V05-21-page3.txt: [('-', '')]
WMH19070522-V05-21-page4.txt: [('Seventh-', 'Seventh'), ('--praise', '-praise'), ('-', ''), ('-', ''), ('-', ''), ('-more', 'more')]
WMH19070529-V05-22-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070529-V05-22-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('af-', 'af')]
WMH19070529-V05-22-page3.txt: [('--', '-')]
WMH19070529-V05-22-page4.txt: [('-', ''), ('-A.', 'A.'), ('-', ''), ('-', ''), ('-', ''), ('refresh-', 'refresh')]
WMH19070605-V05-23-page1.txt: [('--', '-'), ('--every', '-every'), ('-', ''), ('-', ''), ('-', '')]
WMH19070605-V05-23-page2.txt: [('waiting--', 'waiting-')]
WMH19070605-V05-23-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('pub-', 'pub'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070612-V05-24-page1.txt: [('-intercessor.', 'intercessor.'), ('-', '')]
WMH19070612-V05-24-page3.txt: [('-', ''), ('experience--', 'experience-'), ('Sabbath-', 'Sabbath')]
WMH19070619-V05-25-page1.txt: [('con-', 'con'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070619-V05-25-page2.txt: [('-', ''), ('--In', '-In'), ('-', ''), ('-', ''), ('-', '')]
WMH19070619-V05-25-page3.txt: [('-', ''), ('Sabbath-', 'Sabbath'), ('Sabbath-', 'Sabbath'), ('-of', 'of')]
WMH19070619-V05-25-page4.txt: [('-', '')]
WMH19070626-V05-26-page1.txt: [('-', ''), ('adorn-', 'adorn'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070626-V05-26-page2.txt: [('--instructs', '-instructs')]
WMH19070626-V05-26-page3.txt: [('HER-', 'HER')]
WMH19070626-V05-26-page4.txt: [('them-', 'them'), ('--affirmed.', '-affirmed.')]
WMH19070703-V05-27-page1.txt: [('-', '')]
WMH19070703-V05-27-page4.txt: [('-', ''), ('-', '')]
WMH19070710-V05-28-page1.txt: [('GATHERETI-', 'GATHERETI'), ('--', '-'), ('Sabbath-', 'Sabbath'), ('-training.', 'training.'), ('--not', '-not')]
WMH19070710-V05-28-page2.txt: [('oc-', 'oc')]
WMH19070710-V05-28-page4.txt: [('-', ''), ('Camp-', 'Camp'), ('ques-', 'ques')]
WMH19070717-V05-29-page1.txt: [('-', ''), ('-', ''), ('de-', 'de'), ('-----', '----'), ('-school', 'school'), ('-', ''), ('-', ''), ('-', '')]
WMH19070717-V05-29-page2.txt: [('FATH-', 'FATH'), ('righteous-', 'righteous')]
WMH19070724-V05-30-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('E-', 'E'), ('-.', '.'), ('-', '')]
WMH19070724-V05-30-page2.txt: [('MORN-', 'MORN')]
WMH19070731-V05-31-page1.txt: [('re-', 're')]
WMH19070731-V05-31-page2.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19070731-V05-31-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070807-V05-32-page1.txt: [('--', '-'), ('-', ''), ('-"', '"'), ('-notify', 'notify')]
WMH19070807-V05-32-page2.txt: [('Camp--', 'Camp-'), ('-', '')]
WMH19070807-V05-32-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('--health', '-health'), ('Seventh-', 'Seventh'), ('-', '')]
WMH19070807-V05-32-page4.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19070814-V05-33-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-dispensation.', 'dispensation.')]
WMH19070814-V05-33-page2.txt: [('-', '')]
WMH19070814-V05-33-page3.txt: [('mis-', 'mis')]
WMH19070814-V05-33-page4.txt: [('-', ''), ('per-', 'per')]
WMH19070828-V05-34-page1.txt: [('-', '')]
WMH19070828-V05-34-page2.txt: [('-a', 'a'), ('-', ''), ('-', ''), ('prov-', 'prov')]
WMH19070828-V05-34-page3.txt: [('-', '')]
WMH19070828-V05-34-page4.txt: [('-', ''), ('.ASSOCIA-', '.ASSOCIA'), ('-page', 'page')]
WMH19070904-V05-35-page1.txt: [('-', ''), ('---', '--'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('GATHERETI-', 'GATHERETI'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070904-V05-35-page2.txt: [('de-', 'de')]
WMH19070904-V05-35-page3.txt: [('-', '')]
WMH19070911-V05-36-page1.txt: [('-', ''), ('GATHRETI-', 'GATHRETI'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070911-V05-36-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070918-V05-37-page1.txt: [('Dr.-', 'Dr.'), ("'-", "'"), ('GATLiERETI-', 'GATLiERETI')]
WMH19070918-V05-37-page2.txt: [('-coming', 'coming'), ('SERIES-', 'SERIES'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070918-V05-37-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070918-V05-37-page4.txt: [('LAN-', 'LAN'), ('-', ''), ('es-', 'es'), ('-', ''), ('-', ''), ('-', '')]
WMH19070925-V05-38-page1.txt: [('re-', 're'), ('hear-', 'hear'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('GATHERETI-', 'GATHERETI')]
WMH19070925-V05-38-page3.txt: [('-', ''), ('-', ''), ('---', '--'), ('-Covert', 'Covert')]
WMH19070925-V05-38-page4.txt: [('beau-', 'beau')]
WMH19071002-V05-39-page1.txt: [('-', ''), ('insti-', 'insti'), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('-', '')]
WMH19071002-V05-39-page2.txt: [('-', ''), ('-', '')]
WMH19071002-V05-39-page3.txt: [('-on', 'on')]
WMH19071009-V05-40-page1.txt: [('-reined', 'reined'), ('mot-', 'mot'), ('-', ''), ('-This', 'This'), ('begin-', 'begin'), ('"-----', '"----'), ('-', ''), ('-', '')]
WMH19071009-V05-40-page2.txt: [('par-', 'par')]
WMH19071009-V05-40-page3.txt: [('-', ''), ('-', ''), ('.-', '.'), ('-', '')]
WMH19071009-V05-40-page4.txt: [('-', ''), ('-Doctor', 'Doctor'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071016-V05-41-page1.txt: [('-Last', 'Last'), ('con-', 'con'), ('-', ''), ('-', ''), ('-', ''), ('-HE', 'HE'), ('-', ''), ('al-', 'al'), ('para-', 'para'), ('un-', 'un'), ('self-', 'self')]
WMH19071016-V05-41-page2.txt: [('en-', 'en'), ('-joy', 'joy'), ('per-', 'per')]
WMH19071016-V05-41-page3.txt: [('-a', 'a'), ('GENERA-', 'GENERA')]
WMH19071016-V05-41-page4.txt: [('-', ''), ('WORK-', 'WORK')]
WMH19071023-V05-42-page1.txt: [('com-', 'com'), ('-', ''), ('-vil', 'vil'), ('-', ''), ('confirm-', 'confirm'), ('question-', 'question')]
WMH19071023-V05-42-page2.txt: [('-', ''), ('re-', 're'), ('-', '')]
WMH19071023-V05-42-page3.txt: [('-field.', 'field.'), ('-cometogether.', 'cometogether.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071023-V05-42-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071030-V05-43-page1.txt: [('-', '')]
WMH19071030-V05-43-page2.txt: [('Mich-', 'Mich')]
WMH19071030-V05-43-page3.txt: [('.-', '.'), ('-', ''), ('-', ''), ('-Ps.', 'Ps.')]
WMH19071030-V05-43-page4.txt: [('-both', 'both'), ('-', '')]
WMH19071106-V05-44-page1.txt: [("'--", "'-"), ('GOV-', 'GOV'), ('SUPER-', 'SUPER'), ('SAB-', 'SAB')]
WMH19071106-V05-44-page2.txt: [('DISESTAB-', 'DISESTAB'), ('--', '-'), ('-', '')]
WMH19071106-V05-44-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('w.-', 'w.'), ('-bara', 'bara'), ('-', '')]
WMH19071106-V05-44-page4.txt: [('-Note', 'Note'), ('-', ''), ('-The', 'The')]
WMH19071113-V05-45-page1.txt: [('instruct-', 'instruct'), ('GATOERETI-', 'GATOERETI')]
WMH19071113-V05-45-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071113-V05-45-page4.txt: [('-', '')]
WMH19071120-V05-46-page1.txt: [('eIRV-', 'eIRV'), ('GATHERETI-', 'GATHERETI'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071120-V05-46-page2.txt: [('-with', 'with'), ('--aneyear', '-aneyear'), ('-members', 'members'), ('SERIES-', 'SERIES')]
WMH19071120-V05-46-page3.txt: [('--', '-'), ('-', ''), ('Ending-', 'Ending'), ('-', '')]
WMH19071127-V05-47-page1.txt: [('-', ''), ('-', ''), ('Vr-', 'Vr'), ('ans-', 'ans'), ('pre-', 'pre'), ('-and', 'and'), ('-nothingness', 'nothingness'), ('im-', 'im'), ('be-', 'be')]
WMH19071127-V05-47-page2.txt: [("-widow's", "widow's"), ('-a', 'a'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-i', 'i'), ('-', ''), ('-', ''), ('-ix.', 'ix.'), ('-', '')]
WMH19071127-V05-47-page3.txt: [('-', ''), ('RE-', 'RE'), ('DE-', 'DE'), ('"sur-', '"sur')]
WMH19071127-V05-47-page4.txt: [('-', '')]
WMH19071204-V05-48-page1.txt: [('-', ''), ('faith-', 'faith')]
WMH19071204-V05-48-page2.txt: [('-', '')]
WMH19071204-V05-48-page3.txt: [('-', ''), ('-Their', 'Their')]
WMH19071204-V05-48-page4.txt: [('-', ''), ('reg-', 'reg')]
WMH19071211-V05-49-page1.txt: [('-', ''), ('-lad', 'lad'), ('-disease', 'disease'), ('-', '')]
WMH19071211-V05-49-page2.txt: [('-', ''), ('-', ''), ('the-', 'the')]
WMH19071211-V05-49-page3.txt: [('-', ''), ('Carr-', 'Carr'), ('Le-', 'Le')]
WMH19071211-V05-49-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071218-V05-50-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('temp-', 'temp'), ('-', ''), ('ser-', 'ser')]
WMH19071218-V05-50-page2.txt: [('-Lord.', 'Lord.'), ('in-', 'in')]
WMH19071218-V05-50-page3.txt: [('-', ''), ('criti-', 'criti'), ('-', '')]
WMH19071218-V05-50-page4.txt: [('-', ''), ('Ed-', 'Ed'), ('-', ''), ('--', '-'), ('-', ''), ('-', '')]
WMH19080101-V06-01-page2.txt: [('r-', 'r'), ('WATCH-', 'WATCH'), ('--', '-'), ('-ceed.', 'ceed.'), ('success."-', 'success."'), ('-', ''), ('-', '')]
WMH19080101-V06-01-page3.txt: [('-', ''), ('sub-', 'sub'), ('-been', 'been')]
WMH19080101-V06-01-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-the', 'the'), ('consecrat-', 'consecrat'), ("-'Our", "'Our"), ('Con-', 'Con'), ('-from', 'from')]
WMH19080108-V06-02-page1.txt: [('mic.-', 'mic.'), ('--Right', '-Right'), ('-', ''), ('-exciteme', 'exciteme'), ('-The', 'The'), ('gath-', 'gath'), ('-public', 'public'), ('informa-', 'informa')]
WMH19080108-V06-02-page2.txt: [('-done', 'done'), ('Grand-', 'Grand'), ('-', '')]
WMH19080108-V06-02-page3.txt: [('-', ''), ('-II.', 'II.'), ('-man.', 'man.'), ('-', '')]
WMH19080108-V06-02-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080115-V06-03-page1.txt: [('WATCH-', 'WATCH')]
WMH19080115-V06-03-page2.txt: [('-Our', 'Our'), ('WATCH-', 'WATCH'), ('-', '')]
WMH19080115-V06-03-page3.txt: [('distri-', 'distri'), ('-company', 'company'), ('worle-', 'worle')]
WMH19080115-V06-03-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080122-V06-04-page1.txt: [('-the', 'the'), ('pic-', 'pic'), ('faith-', 'faith')]
WMH19080122-V06-04-page2.txt: [('-and', 'and'), ('fashion-', 'fashion')]
WMH19080122-V06-04-page3.txt: [('-', '')]
WMH19080122-V06-04-page4.txt: [('--Wellspri', '-Wellspri'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080129-V06-05-page1.txt: [('fte-', 'fte'), ('GATHERETI-', 'GATHERETI')]
WMH19080129-V06-05-page2.txt: [('-', '')]
WMH19080129-V06-05-page3.txt: [('And-', 'And'), ('-This', 'This'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080129-V06-05-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('BET-', 'BET'), ('-', ''), ('-', ''), ('-', '')]
WMH19080205-V06-06-page1.txt: [('-..t..Pft', '..t..Pft'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-is', 'is')]
WMH19080205-V06-06-page2.txt: [('church-', 'church')]
WMH19080205-V06-06-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080212-V06-07-page1.txt: [('Thous-', 'Thous'), ('-', ''), ('-', ''), ('-', ''), ('Mission-', 'Mission'), ('-', ''), ('-', ''), ('Michigan-', 'Michigan'), ('Ex-', 'Ex'), ('-', ''), ('Nash-', 'Nash'), ('-', '')]
WMH19080212-V06-07-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Conf.-', 'Conf.'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080212-V06-07-page3.txt: [('-present', 'present'), ('-loss', 'loss'), ('.-', '.'), ('-', ''), ('--', '-'), ('-', ''), ('-new', 'new'), ('-', '')]
WMH19080212-V06-07-page4.txt: [('-', ''), ('Se-', 'Se'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080219-V06-08-page1.txt: [('-', ''), ('-', ''), ('Permits-', 'Permits'), ('Credentials-', 'Credentials'), ('Total-', 'Total'), ('employed--', 'employed-'), ('Educa-', 'Educa')]
WMH19080219-V06-08-page3.txt: [('-our', 'our'), ('-', ''), ('-', '')]
WMH19080219-V06-08-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080226-V06-09-page1.txt: [('-', ''), ('-', ''), ('February-', 'February'), ('-', ''), ('-', ''), ('min-', 'min')]
WMH19080226-V06-09-page2.txt: [('De-', 'De'), ('peo-', 'peo')]
WMH19080226-V06-09-page3.txt: [('-', ''), ('plan-', 'plan')]
WMH19080226-V06-09-page4.txt: [('-', '')]
WMH19080304-V06-10-page1.txt: [('-', ''), ('-Manager', 'Manager')]
WMH19080304-V06-10-page2.txt: [('year-', 'year')]
WMH19080304-V06-10-page4.txt: [('.-', '.')]
WMH19080311-V06-11-page1.txt: [('-z', 'z'), ('-second', 'second'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('famil-', 'famil'), ('GATHERETI-', 'GATHERETI'), ('--', '-')]
WMH19080311-V06-11-page3.txt: [('-and', 'and'), ('con-', 'con')]
WMH19080311-V06-11-page4.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19080318-V06-12-page1.txt: [('-.', '.'), ('ERETI-', 'ERETI'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080318-V06-12-page3.txt: [('-', ''), ('-R.', 'R.'), ('-', '')]
WMH19080318-V06-12-page4.txt: [('-', '')]
WMH19080325-V06-13-page1.txt: [('Treas-', 'Treas'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-delegates', 'delegates'), ('the-', 'the'), ('-', ''), ('-', ''), ('GATMERETI-', 'GATMERETI')]
WMH19080325-V06-13-page2.txt: [('-John', 'John'), ('-so', 'so'), ('-', ''), ('March-', 'March')]
WMH19080325-V06-13-page4.txt: [("-cardinal'", "cardinal'"), ('-Nebr.', 'Nebr.'), ('WATCHMAN.-', 'WATCHMAN.')]
WMH19080401-V06-14-page1.txt: [('unad-', 'unad'), ('-', ''), ('-', ''), ('GATRERETI-', 'GATRERETI'), ('-', '')]
WMH19080401-V06-14-page2.txt: [('super-', 'super'), ('-', '')]
WMH19080401-V06-14-page4.txt: [('Years-', 'Years'), ('-June', 'June'), ('-', '')]
WMH19080408-V06-15-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080408-V06-15-page4.txt: [('-We', 'We')]
WMH19080415-V06-16-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('Mes-', 'Mes'), ('-', ''), ('-', ''), ('-', ''), ('Mc-', 'Mc'), ('-', '')]
WMH19080415-V06-16-page2.txt: [('re-', 're')]
WMH19080415-V06-16-page3.txt: [('-', ''), ('-Fitch', 'Fitch'), ('-the', 'the'), ('-we', 'we'), ('Seventh-', 'Seventh'), ('-our', 'our'), ('-', '')]
WMH19080422-V06-17-page1.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19080422-V06-17-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080422-V06-17-page4.txt: [('diet--', 'diet-')]
WMH19080429-V06-18-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('---', '--'), ('-', ''), ('Seventh-', 'Seventh')]
WMH19080429-V06-18-page2.txt: [('-', ''), ('-', '')]
WMH19080429-V06-18-page3.txt: [('busi-', 'busi'), ('-', ''), ('weep-', 'weep'), ('Seventh-', 'Seventh'), ('-reading', 'reading'), ('inform-', 'inform')]
WMH19080429-V06-18-page4.txt: [('-', '')]
WMH19080506-V06-19-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('Ir-', 'Ir')]
WMH19080506-V06-19-page2.txt: [('peo.-', 'peo.'), ('-c.', 'c.')]
WMH19080506-V06-19-page3.txt: [('-', ''), ('-', ''), ('child-', 'child'), ('du-', 'du')]
WMH19080506-V06-19-page4.txt: [('---', '--'), ('-', ''), ('-', ''), ('-', '')]
WMH19080513-V06-20-page1.txt: [('laborer-', 'laborer'), ('uper-', 'uper'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('GATHERETI-', 'GATHERETI'), ('z-', 'z')]
WMH19080513-V06-20-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080513-V06-20-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-on', 'on'), ('-', '')]
WMH19080520-V06-21-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Hof-', 'Hof'), ('-', ''), ('-', ''), ('-', ''), ('-they', 'they'), ('-', ''), ('sifsisev-', 'sifsisev')]
WMH19080520-V06-21-page2.txt: [('Center--', 'Center-'), ('-that', 'that'), ('-', ''), ('Ed-', 'Ed')]
WMH19080520-V06-21-page3.txt: [('-will', 'will'), ('-', ''), ('-', ''), ('-', '')]
WMH19080520-V06-21-page4.txt: [('-', '')]
WMH19080527-V06-22-page1.txt: [('-', ''), ('-', ''), ('rush-', 'rush'), ('-', ''), ('-', ''), ('to-', 'to')]
WMH19080527-V06-22-page2.txt: [('-flavors', 'flavors'), ('-', ''), ('-Two', 'Two')]
WMH19080527-V06-22-page4.txt: [('-', ''), ('PRO-', 'PRO')]
WMH19080603-V06-23-page1.txt: [('it-', 'it'), ('-', ''), ('mechani-', 'mechani'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080603-V06-23-page2.txt: [('-minds', 'minds')]
WMH19080603-V06-23-page3.txt: [('-', '')]
WMH19080603-V06-23-page4.txt: [('-', ''), ('hold-', 'hold')]
WMH19080610-V06-24-page1.txt: [('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('FIELDSECRETARYr-', 'FIELDSECRETARYr')]
WMH19080610-V06-24-page2.txt: [('-', ''), ('corn-', 'corn'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080610-V06-24-page3.txt: [('-we', 'we')]
WMH19080610-V06-24-page4.txt: [('-', ''), ('-James', 'James'), ('HER-', 'HER')]
WMH19080617-V06-25-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('flush-', 'flush')]
WMH19080617-V06-25-page2.txt: [('praise."--', 'praise."-'), ('-', '')]
WMH19080617-V06-25-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-------', '------'), ('-', '')]
WMH19080617-V06-25-page4.txt: [('-', ''), ('-to', 'to'), ('ap-', 'ap'), ('-', '')]
WMH19080624-V06-26-page1.txt: [('"Bless-', '"Bless'), ('-', ''), ('-', ''), ('I-', 'I')]
WMH19080624-V06-26-page2.txt: [('-', '')]
WMH19080624-V06-26-page3.txt: [('-to', 'to'), ('Frank-', 'Frank')]
WMH19080624-V06-26-page4.txt: [('-', ''), ('ap-', 'ap'), ('-', ''), ('-the', 'the')]
WMH19080701-V06-27-page1.txt: [('-', ''), ('-', ''), ('Lexi-', 'Lexi'), ('-', ''), ('-', '')]
WMH19080701-V06-27-page2.txt: [('camp-', 'camp'), ('-', ''), ('be-', 'be'), ('-', '')]
WMH19080701-V06-27-page3.txt: [('-know', 'know')]
WMH19080701-V06-27-page4.txt: [('-the', 'the'), ('WATCH-', 'WATCH'), ('-', '')]
WMH19080708-V06-28-page1.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19080708-V06-28-page3.txt: [('-', ''), ('suffering-', 'suffering'), ('-', '')]
WMH19080708-V06-28-page4.txt: [('-', ''), ('-the', 'the'), ('-', ''), ('-', ''), ('as-', 'as')]
WMH19080715-V06-29-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-and', 'and'), ('-', '')]
WMH19080715-V06-29-page2.txt: [('-ddaayy', 'ddaayy'), ('-', ''), ('-and', 'and'), ('-', '')]
WMH19080715-V06-29-page3.txt: [('-church', 'church'), ('-', '')]
WMH19080715-V06-29-page4.txt: [('-', ''), ('-', '')]
WMH19080722-V06-30-page1.txt: [('Seventh-', 'Seventh'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080722-V06-30-page2.txt: [('-issue', 'issue'), ('LIB-', 'LIB'), ('the-', 'the'), ('-', '')]
WMH19080722-V06-30-page4.txt: [('-convinced', 'convinced'), ('-great', 'great'), ('-', ''), ('-church', 'church'), ('--James', '-James')]
WMH19080729-V06-31-page1.txt: [('-', ''), ('-', ''), ('Hof.-', 'Hof.')]
WMH19080729-V06-31-page2.txt: [('-', '')]
WMH19080729-V06-31-page3.txt: [('depart-', 'depart'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-Park', 'Park'), ('-', ''), ('let-', 'let')]
WMH19080729-V06-31-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080805-V06-32-page1.txt: [('be-', 'be'), ('-', ''), ('-', ''), ('-', ''), ('COM-', 'COM'), ('sub-', 'sub')]
WMH19080805-V06-32-page2.txt: [('ox-', 'ox'), ('-', '')]
WMH19080805-V06-32-page3.txt: [('-to', 'to'), ('na-', 'na')]
WMH19080805-V06-32-page4.txt: [('-', ''), ('-the', 'the')]
WMH19080812-V06-33-page1.txt: [('-', ''), ('-', ''), ('af-', 'af'), ('-', ''), ('-', ''), ('-', '')]
WMH19080812-V06-33-page2.txt: [('-', '')]
WMH19080812-V06-33-page3.txt: [('GENER-', 'GENER'), ('ex-', 'ex')]
WMH19080812-V06-33-page4.txt: [('Publish-', 'Publish')]
WMH19080826-V06-34-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080826-V06-34-page2.txt: [('Anti-', 'Anti')]
WMH19080826-V06-34-page3.txt: [('class-', 'class'), ('-', ''), ('-made', 'made')]
WMH19080826-V06-34-page4.txt: [('-', '')]
WMH19080902-V06-35-page1.txt: [('-our', 'our'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--SELECTED.', '-SELECTED.'), ('-', '')]
WMH19080902-V06-35-page2.txt: [('-opportune', 'opportune'), ('followed-', 'followed')]
WMH19080902-V06-35-page3.txt: [('-', '')]
WMH19080902-V06-35-page4.txt: [('-', '')]
WMH19080909-V06-36-page1.txt: [('-I', 'I'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('night--', 'night-'), ('-', '')]
WMH19080909-V06-36-page2.txt: [('auspi-', 'auspi')]
WMH19080909-V06-36-page3.txt: [('-', ''), ('-', '')]
WMH19080909-V06-36-page4.txt: [('-', '')]
WMH19080916-V06-37-page1.txt: [('-', ''), ('Michigan.-', 'Michigan.'), ('r-', 'r'), ('-', ''), ('-', ''), ('-', ''), ('-arise', 'arise'), ('-holy', 'holy'), ('con-', 'con')]
WMH19080916-V06-37-page2.txt: [('-', ''), ('-', ''), ('-neither', 'neither'), ('the-', 'the'), ('-for', 'for'), ('-pay', 'pay')]
WMH19080916-V06-37-page3.txt: [('shad-', 'shad'), ('-consummation.', 'consummation.'), ('-', '')]
WMH19080916-V06-37-page4.txt: [('Place-', 'Place'), ('-', ''), ('-', ''), ('the-', 'the'), ('Almeda-', 'Almeda')]
WMH19080923-V06-38-page1.txt: [('-', ''), ('-', ''), ('-A--udi.t', 'A--udi.t'), ('-', ''), ('-', ''), ('-', ''), ('the-', 'the')]
WMH19080923-V06-38-page2.txt: [('dif-', 'dif'), ('-', ''), ('-', ''), ('-', '')]
WMH19080923-V06-38-page3.txt: [('G.-', 'G.'), ('-', ''), ('un-', 'un')]
WMH19080923-V06-38-page4.txt: [('-you', 'you'), ('-should', 'should')]
WMH19080930-V06-39-page1.txt: [('-', ''), ('-and', 'and'), ('go-', 'go'), ('-', ''), ('-', '')]
WMH19080930-V06-39-page2.txt: [('-the', 'the')]
WMH19080930-V06-39-page3.txt: [('-', ''), ('-', ''), ('influ.-', 'influ.'), ('-and', 'and'), ('-', '')]
WMH19080930-V06-39-page4.txt: [('-', ''), ('Conference.-', 'Conference.'), ('-', ''), ('--Mrs.', '-Mrs.'), ('-many', 'many'), ('Van-', 'Van')]
WMH19081007-V06-40-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('Hof-', 'Hof')]
WMH19081007-V06-40-page2.txt: [('--Ans.', '-Ans.')]
WMH19081007-V06-40-page3.txt: [('-Business', 'Business'), ('Mateo-', 'Mateo'), ('ENLIGHTEN-', 'ENLIGHTEN')]
WMH19081007-V06-40-page4.txt: [('-heard', 'heard'), ('mis-', 'mis'), ('mes-', 'mes'), ('-', ''), ('-', '')]
WMH19081014-V06-41-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('Hof--', 'Hof-'), ('-', ''), ('--', '-'), ('cL-', 'cL')]
WMH19081014-V06-41-page2.txt: [('-', ''), ('-', ''), ('-Labor', 'Labor'), ('re-', 're'), ('-the', 'the'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19081014-V06-41-page3.txt: [('commit-', 'commit'), ('develop-', 'develop'), ('corn--', 'corn-')]
WMH19081014-V06-41-page4.txt: [('advo-', 'advo'), ('be-', 'be'), ('life.-', 'life.'), ('illus-', 'illus'), ('-to', 'to')]
WMH19081021-V06-42-page1.txt: [('especi-', 'especi'), ('-', ''), ('-', ''), ('-', '')]
WMH19081021-V06-42-page2.txt: [('for-', 'for')]
WMH19081021-V06-42-page3.txt: [('truth-', 'truth'), ('-', ''), ('-continue', 'continue'), ('Pool-', 'Pool')]
WMH19081028-V06-43-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('con.-', 'con.'), ('-fulfilled', 'fulfilled')]
WMH19081028-V06-43-page2.txt: [('RE-', 'RE'), ('-', '')]
WMH19081028-V06-43-page3.txt: [('-this', 'this'), ('-', ''), ('-So', 'So'), ('-Soon', 'Soon')]
WMH19081028-V06-43-page4.txt: [('-Growth', 'Growth'), ('-Philippians.', 'Philippians.'), ('-page', 'page')]
WMH19081104-V06-44-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19081104-V06-44-page2.txt: [('-obtain', 'obtain'), ('-to', 'to')]
WMH19081104-V06-44-page3.txt: [('.-', '.'), ('-', ''), ('quar-', 'quar'), ('Sab-', 'Sab'), ('-', '')]
WMH19081104-V06-44-page4.txt: [('Danish-', 'Danish'), ('Danish-', 'Danish'), ('Danish-', 'Danish'), ('MICHI-', 'MICHI')]
WMH19081111-V06-45-page1.txt: [('na-', 'na'), ('-', ''), ('-C.', 'C.'), ('-', ''), ('-', ''), ('P-', 'P')]
WMH19081111-V06-45-page2.txt: [('Harbor-', 'Harbor'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19081111-V06-45-page3.txt: [('-', ''), ('firmame-', 'firmame'), ('-', ''), ('-bare', 'bare'), ('-', ''), ('-', '')]
WMH19081111-V06-45-page5.txt: [('LJesY-', 'LJesY')]
In [30]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction3

Average verified rate: 0.9762576375579471

Average of error rates: 0.02556951102588687

Total token count: 915705

In [31]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[31]:
[('m', 1749),
 ('w', 1503),
 ('g', 1437),
 ('e', 1313),
 ('d', 1278),
 ('r', 688),
 ('n', 645),
 ("'", 511),
 ('f', 444),
 ('t', 382),
 ('th', 283),
 ('co', 172),
 ('oo', 171),
 ('sabbathschool', 163),
 ('io', 120),
 ('mt', 108),
 ('k', 107),
 ('ro', 96),
 ('wm', 82),
 ('numbess', 75),
 ('re', 71),
 ('u', 69),
 ("'field", 67),
 ("canvassers'", 58),
 ('x', 46),
 ("'the", 44),
 ('horr', 39),
 ("the'", 38),
 ('rd', 33),
 ('blendon', 32),
 ('ex', 32),
 ('brower', 31),
 ('harnden', 30),
 ("f'd", 30),
 ('mchugh', 29),
 ('seventhday', 28),
 ('nd', 28),
 ('cleora', 27),
 ('tion', 25),
 ('nunica', 23),
 ('sabbathschools', 23),
 ('q', 23),
 ("'to", 22),
 ('-', 21),
 ('vowyla', 21),
 ('al', 21),
 ('z', 20),
 ('loth', 20),
 ('fd', 20),
 ('michi', 20)]

Correction 4 -- Remove extra quotation marks

In [33]:
# %load shared_elements/remove_extra_quotation_marks.py
prev = cycle
cycle = "correction4"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    corrections = []
    for token in tokens:
        token_list = list(token)
        last_char = token_list[-1]

        if last_char is "'":
            if len(token) > 1:
                if token_list[-2] is 's' or 'S':
                    pass
                else:
                    corrections.append((token, re.sub(r"'", r"", token)))
            else:
                pass
        elif token[0] is "'":
            corrections.append((token, re.sub(r"'", r"", token)))   
        else:
            pass
    
    if len(corrections) > 0:
        print('{}: {}'.format(filename, corrections))

        for correction in corrections:
            content = clean.replace_pair(correction, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
WMH19030513-V01-19-page1.txt: [("'Alarmed", 'Alarmed')]
WMH19030520-V01-20-page4.txt: [("'born", 'born')]
WMH19030527-V01-21-page4.txt: [("'An", 'An')]
WMH19030610-V01-23-page1.txt: [("'Evangelical", 'Evangelical')]
WMH19030610-V01-23-page2.txt: [("'promotes", 'promotes')]
WMH19030610-V01-23-page4.txt: [("'Rem", 'Rem'), ("'Rotes", 'Rotes')]
WMH19030624-V01-25-page4.txt: [("'dews", 'dews'), ("'notes", 'notes')]
WMH19030701-V01-26-page4.txt: [("'news", 'news'), ("'Pews", 'Pews'), ("'Rotes", 'Rotes')]
WMH19030708-V01-27-page2.txt: [("'Department", 'Department')]
WMH19030708-V01-27-page4.txt: [("'school", 'school'), ("'Mews", 'Mews'), ("'Rotes", 'Rotes')]
WMH19030715-V01-28-page1.txt: [("'Tis", 'Tis')]
WMH19030715-V01-28-page3.txt: [("'.Died", '.Died')]
WMH19030715-V01-28-page4.txt: [("'Flews", 'Flews'), ("'Hews", 'Hews')]
WMH19030722-V01-29-page2.txt: [("'and", 'and')]
WMH19030722-V01-29-page4.txt: [("'Flews", 'Flews'), ("'Motes", 'Motes'), ("'Pews", 'Pews')]
WMH19030930-V01-39-page3.txt: [("'pecan", 'pecan')]
WMH19030930-V01-39-page4.txt: [("'news", 'news'), ("'Notes", 'Notes')]
WMH19031028-V01-43-page1.txt: [("'self", 'self')]
WMH19031028-V01-43-page3.txt: [("'all", 'all')]
WMH19031028-V01-43-page4.txt: [("'Hews", 'Hews'), ("'by", 'by'), ("'we", 'we'), ("'Hews", 'Hews')]
WMH19031118-V01-46-page4.txt: [("'Flews", 'Flews'), ("'notes", 'notes'), ("'each", 'each')]
WMH19040106-V02-02-page1.txt: [("'the", 'the')]
WMH19040113-V02-03-page3.txt: [("'We", 'We')]
WMH19040113-V02-03-page4.txt: [("'Battle", 'Battle')]
WMH19040210-V02-06-page2.txt: [("'treatments", 'treatments'), ("'summer", 'summer'), ("'all", 'all')]
WMH19040210-V02-06-page3.txt: [("'Creek", 'Creek')]
WMH19040210-V02-06-page4.txt: [("'dress", 'dress')]
WMH19040217-V02-07-page1.txt: [("'traverse", 'traverse'), ("'Joseph", 'Joseph'), ("'loved", 'loved'), ("'book", 'book'), ("'the", 'the')]
WMH19040224-V02-08-page2.txt: [("'nurses", 'nurses')]
WMH19040302-V02-09-page2.txt: [("'present", 'present')]
WMH19040302-V02-09-page3.txt: [("'stairs", 'stairs')]
WMH19040309-V02-10-page2.txt: [("'Another", 'Another')]
WMH19040309-V02-10-page3.txt: [("'persons", 'persons')]
WMH19040309-V02-10-page4.txt: [("'I", 'I'), ("'My", 'My')]
WMH19040316-V02-11-page1.txt: [("'Michigan", 'Michigan')]
WMH19040316-V02-11-page2.txt: [("'the", 'the'), ("'More", 'More')]
WMH19040316-V02-11-page3.txt: [("'great", 'great')]
WMH19040323-V02-12-page1.txt: [("'we", 'we')]
WMH19040323-V02-12-page2.txt: [("'book", 'book')]
WMH19040323-V02-12-page3.txt: [("'to", 'to')]
WMH19040330-V02-13-page3.txt: [("'unable", 'unable')]
WMH19040406-V02-14-page2.txt: [("'the", 'the'), ("'in", 'in'), ("'eight", 'eight')]
WMH19040406-V02-14-page3.txt: [("'lungs", 'lungs')]
WMH19040420-V02-16-page1.txt: [("'made", 'made'), ("'Not", 'Not')]
WMH19040420-V02-16-page2.txt: [("'been", 'been'), ("'With", 'With')]
WMH19040420-V02-16-page4.txt: [("'Conference.", 'Conference.')]
WMH19040427-V02-17-page1.txt: [("'Michigan", 'Michigan'), ("'branch", 'branch')]
WMH19040427-V02-17-page2.txt: [("'occassionally.", 'occassionally.'), ("'of", 'of')]
WMH19040427-V02-17-page3.txt: [("'Thoburn", 'Thoburn')]
WMH19040504-V02-18-page2.txt: [("'it", 'it'), ("'evil", 'evil')]
WMH19040504-V02-18-page3.txt: [("'A", 'A'), ("'meal", 'meal')]
WMH19040511-V02-19-page1.txt: [("'I", 'I'), ("'that", 'that'), ("'tanner.", 'tanner.')]
WMH19040511-V02-19-page2.txt: [("'up", 'up')]
WMH19040511-V02-19-page3.txt: [("'Freas.", 'Freas.'), ("'Financial.", 'Financial.')]
WMH19040518-V02-20-page1.txt: [("'Was", 'Was'), ("'warm", 'warm'), ("'this", 'this'), ("'others", 'others'), ("'influence", 'influence')]
WMH19040518-V02-20-page2.txt: [("'go", 'go')]
WMH19040601-V02-22-page2.txt: [("'that", 'that')]
WMH19040601-V02-22-page4.txt: [("'Kenyon", 'Kenyon')]
WMH19040608-V02-23-page1.txt: [("'Michigan", 'Michigan'), ("'large", 'large'), ("'and", 'and')]
WMH19040608-V02-23-page3.txt: [("'called", 'called'), ("'to", 'to'), ("'the", 'the')]
WMH19040608-V02-23-page4.txt: [("'was", 'was')]
WMH19040622-V02-24-page1.txt: [("'pay", 'pay')]
WMH19040622-V02-24-page4.txt: [("'and", 'and')]
WMH19040629-V02-25-page1.txt: [("'the", 'the')]
WMH19040629-V02-25-page4.txt: [("'Conference", 'Conference')]
WMH19040706-V02-26-page4.txt: [("'in", 'in'), ("'HERALD", 'HERALD')]
WMH19040713-V02-27-page2.txt: [("'he", 'he')]
WMH19040713-V02-27-page3.txt: [("'session", 'session'), ("'to", 'to')]
WMH19040720-V02-28-page1.txt: [("'Reading", 'Reading')]
WMH19040720-V02-28-page2.txt: [("'when", 'when')]
WMH19040720-V02-28-page3.txt: [("'of", 'of'), ("'teacher", 'teacher'), ("'and", 'and')]
WMH19040720-V02-28-page4.txt: [("'subscriptions", 'subscriptions'), ("'I'.", 'I.')]
WMH19040727-V02-29-page2.txt: [("'favor", 'favor')]
WMH19040810-V02-31-page1.txt: [("'Much", 'Much')]
WMH19040810-V02-31-page2.txt: [("'s", 's')]
WMH19040810-V02-31-page3.txt: [("'disease.", 'disease.'), ("'state", 'state')]
WMH19040817-V02-32-page2.txt: [("'and", 'and'), ("'reaching", 'reaching')]
WMH19040817-V02-32-page4.txt: [("'We", 'We')]
WMH19040831-V02-33-page1.txt: [("'the", 'the')]
WMH19040831-V02-33-page2.txt: [("'as", 'as')]
WMH19040831-V02-33-page3.txt: [("'Breads", 'Breads')]
WMH19040914-V02-34-page1.txt: [("'Michigan", 'Michigan'), ("'poverty.", 'poverty.')]
WMH19040914-V02-34-page2.txt: [("'great", 'great')]
WMH19040914-V02-34-page3.txt: [("'they", 'they'), ("'a", 'a')]
WMH19040914-V02-34-page4.txt: [("'opened", 'opened'), ("'school", 'school')]
WMH19040928-V02-35-page3.txt: [("'twelve", 'twelve')]
WMH19041005-V02-36-page2.txt: [("'c.", 'c.')]
WMH19041005-V02-36-page3.txt: [("'I'REAMENT.", 'IREAMENT.'), ("'a", 'a')]
WMH19041005-V02-36-page4.txt: [("'September.", 'September.')]
WMH19041012-V02-37-page1.txt: [("'aostile", 'aostile'), ("'brief", 'brief')]
WMH19041012-V02-37-page2.txt: [("'of", 'of')]
WMH19041019-V02-38-page3.txt: [("'devoted", 'devoted'), ("'These", 'These'), ("'to", 'to'), ("'for", 'for')]
WMH19041026-V02-39-page1.txt: [("'new", 'new')]
WMH19041026-V02-39-page3.txt: [("'that", 'that'), ("'part", 'part')]
WMH19041026-V02-39-page4.txt: [("'Missionary", 'Missionary'), ("'education", 'education')]
WMH19041102-V02-40-page1.txt: [("'magnitude", 'magnitude')]
WMH19041102-V02-40-page2.txt: [("'work.", 'work.')]
WMH19041102-V02-40-page3.txt: [("'when", 'when')]
WMH19041102-V02-40-page4.txt: [("'doing", 'doing'), ("'The", 'The')]
WMH19041109-V02-41-page2.txt: [("'Sabbath-keepers", 'Sabbath-keepers')]
WMH19041109-V02-41-page4.txt: [("'rule", 'rule')]
WMH19041116-V02-42-page3.txt: [("'the", 'the')]
WMH19041123-V02-43-page1.txt: [("'of", 'of'), ("'prayer", 'prayer')]
WMH19041123-V02-43-page3.txt: [("'The", 'The')]
WMH19041123-V02-43-page4.txt: [("'the", 'the')]
WMH19041130-V02-44-page1.txt: [("'to", 'to')]
WMH19041130-V02-44-page2.txt: [("'a", 'a'), ("'association", 'association')]
WMH19041130-V02-44-page3.txt: [("'scold", 'scold')]
WMH19041207-V02-45-page2.txt: [("'us", 'us'), ("'RECORD.", 'RECORD.')]
WMH19041207-V02-45-page3.txt: [("'services", 'services')]
WMH19041214-V02-46-page3.txt: [("'Food", 'Food'), ("'remove", 'remove'), ("'be", 'be')]
WMH19041214-V02-46-page4.txt: [("'Jesus", 'Jesus')]
WMH19041221-V02-47-page2.txt: [("'When", 'When'), ("'I", 'I'), ("'Each", 'Each')]
WMH19041221-V02-47-page4.txt: [("'message", 'message'), ("'fourth", 'fourth'), ("'up", 'up')]
WMH19041228-V02-48-page1.txt: [("'Wealthy", 'Wealthy')]
WMH19041228-V02-48-page3.txt: [("'you", 'you')]
WMH19050104-V03-01-page2.txt: [("'the", 'the'), ("'NOW.", 'NOW.')]
WMH19050104-V03-01-page4.txt: [("'A", 'A')]
WMH19050118-V03-03-page1.txt: [("'matter", 'matter')]
WMH19050118-V03-03-page2.txt: [("'courage", 'courage'), ("'Christ's", 'Christs'), ("'The", 'The'), ("'Object", 'Object'), ("'Object", 'Object'), ("'Object", 'Object')]
WMH19050118-V03-03-page3.txt: [("'Object", 'Object'), ("'Christ's", 'Christs'), ("'Object", 'Object'), ("'Object", 'Object'), ("'Object", 'Object')]
WMH19050208-V03-05-page1.txt: [("'kind", 'kind')]
WMH19050208-V03-05-page2.txt: [("'seventeen", 'seventeen')]
WMH19050215-V03-06-page3.txt: [("'Hesperia", 'Hesperia'), ("'A", 'A')]
WMH19050215-V03-06-page4.txt: [("'William", 'William')]
WMH19050222-V03-07-page1.txt: [("'The", 'The')]
WMH19050222-V03-07-page3.txt: [("'It", 'It'), ("'work.", 'work.')]
WMH19050301-V03-08-page1.txt: [("'That", 'That')]
WMH19050301-V03-08-page2.txt: [("'W.", 'W.')]
WMH19050315-V03-10-page2.txt: [("'F", 'F'), ("'Southern", 'Southern')]
WMH19050315-V03-10-page4.txt: [("'to", 'to')]
WMH19050322-V03-11-page1.txt: [("'and", 'and')]
WMH19050322-V03-11-page5.txt: [("'WI", 'WI')]
WMH19050329-V03-12-page3.txt: [("'feature", 'feature'), ("'field.", 'field.')]
WMH19050405-V03-13-page1.txt: [("'have", 'have')]
WMH19050405-V03-13-page2.txt: [("'to", 'to'), ("'intensely", 'intensely')]
WMH19050413-V03-14-page1.txt: [("'Sept.", 'Sept.')]
WMH19050413-V03-14-page2.txt: [("'officers", 'officers')]
WMH19050413-V03-14-page3.txt: [("'The", 'The')]
WMH19050419-V03-15-page2.txt: [("'to", 'to')]
WMH19050419-V03-15-page3.txt: [("'on", 'on')]
WMH19050419-V03-15-page4.txt: [("'Cedar", 'Cedar')]
WMH19050426-V03-16-page1.txt: [("'increase", 'increase'), ("'effect", 'effect'), ("'that", 'that')]
WMH19050426-V03-16-page4.txt: [("'Tis", 'Tis'), ("'patients.", 'patients.')]
WMH19050503-V03-17-page1.txt: [("'Fhat", 'Fhat'), ("'of", 'of')]
WMH19050503-V03-17-page2.txt: [("'educational", 'educational'), ("'In", 'In')]
WMH19050510-V03-18-page3.txt: [("'is", 'is'), ("'task", 'task')]
WMH19050510-V03-18-page4.txt: [("'having", 'having'), ("'Come", 'Come'), ("'Here", 'Here')]
WMH19050517-V03-19-page1.txt: [("'work", 'work'), ("'weightiest", 'weightiest')]
WMH19050517-V03-19-page4.txt: [("'Conference", 'Conference')]
WMH19050524-V03-20-page1.txt: [("'and", 'and')]
WMH19050524-V03-20-page2.txt: [("'Christ's", 'Christs')]
WMH19050524-V03-20-page3.txt: [("'Field.", 'Field.')]
WMH19050531-V03-21-page1.txt: [("'leprosy", 'leprosy')]
WMH19050531-V03-21-page2.txt: [("'but", 'but'), ("'Field.", 'Field.'), ("'Now", 'Now')]
WMH19050531-V03-21-page4.txt: [("'Twice", 'Twice'), ("'already", 'already')]
WMH19050607-V03-22-page1.txt: [("'and", 'and')]
WMH19050607-V03-22-page3.txt: [("'EDITH", 'EDITH')]
WMH19050607-V03-22-page4.txt: [("'.", '.')]
WMH19050614-V03-23-page1.txt: [("'Surely", 'Surely')]
WMH19050614-V03-23-page4.txt: [("'He", 'He')]
WMH19050621-V03-24-page1.txt: [("'class", 'class')]
WMH19050621-V03-24-page2.txt: [("'Bring", 'Bring')]
WMH19050621-V03-24-page3.txt: [("'the", 'the'), ("'Sing", 'Sing'), ("'The", 'The'), ("'They", 'They'), ("'As", 'As'), ("'For", 'For')]
WMH19050621-V03-24-page4.txt: [("'body", 'body')]
WMH19050628-V03-25-page1.txt: [("'and", 'and'), ("'RESOLVED", 'RESOLVED')]
WMH19050628-V03-25-page2.txt: [("'Desire", 'Desire'), ("'Desire", 'Desire'), ("'prominent.", 'prominent.')]
WMH19050705-V03-26-page1.txt: [("'twer", 'twer')]
WMH19050705-V03-26-page2.txt: [("'are", 'are')]
WMH19050705-V03-26-page3.txt: [("'Field.", 'Field.'), ("'Hours", 'Hours'), ("'greater", 'greater')]
WMH19050705-V03-26-page4.txt: [("'Be", 'Be')]
WMH19050712-V03-27-page2.txt: [("'should", 'should')]
WMH19050712-V03-27-page3.txt: [("'financial.", 'financial.')]
WMH19050719-V03-28-page3.txt: [("'Field.", 'Field.')]
WMH19050719-V03-28-page4.txt: [("'The", 'The')]
WMH19050726-V03-29-page1.txt: [("'August", 'August')]
WMH19050726-V03-29-page2.txt: [("'Field.", 'Field.'), ("'The", 'The')]
WMH19050802-V03-30-page1.txt: [("'Hume.", 'Hume.')]
WMH19050802-V03-30-page2.txt: [("'Practical", 'Practical')]
WMH19050802-V03-30-page3.txt: [("'school", 'school'), ("'Field.", 'Field.')]
WMH19050802-V03-30-page4.txt: [("'God", 'God')]
WMH19050809-V03-31-page2.txt: [("'for", 'for'), ("'Field.", 'Field.'), ("'upon", 'upon'), ("'Glenwood", 'Glenwood')]
WMH19050809-V03-31-page3.txt: [("'to", 'to')]
WMH19050816-V03-32-page1.txt: [("'in", 'in')]
WMH19050816-V03-32-page3.txt: [("'Jews", 'Jews')]
WMH19050816-V03-32-page4.txt: [("'of", 'of'), ("'Sanctify", 'Sanctify')]
WMH19050830-V03-33-page1.txt: [("'largest", 'largest'), ("'the", 'the')]
WMH19050830-V03-33-page4.txt: [("'Follow", 'Follow')]
WMH19050906-V03-34-page1.txt: [("'schools", 'schools')]
WMH19050906-V03-34-page3.txt: [("'Financial.", 'Financial.')]
WMH19050906-V03-34-page4.txt: [("'young", 'young'), ("'tis", 'tis'), ("'tis", 'tis')]
WMH19050913-V03-35-page2.txt: [("'lead", 'lead')]
WMH19050913-V03-35-page4.txt: [("'last", 'last')]
WMH19050920-V03-36-page1.txt: [("'field.", 'field.')]
WMH19050920-V03-36-page2.txt: [("'quiet", 'quiet'), ("'to", 'to'), ("'to", 'to')]
WMH19050927-V03-37-page1.txt: [("'at", 'at'), ("'those", 'those')]
WMH19050927-V03-37-page2.txt: [("'were", 'were')]
WMH19050927-V03-37-page3.txt: [("'come", 'come'), ("'lifting", 'lifting')]
WMH19051004-V03-38-page3.txt: [("'tis", 'tis')]
WMH19051004-V03-38-page4.txt: [("'Edward", 'Edward')]
WMH19051011-V03-39-page1.txt: [("'the", 'the'), ("'the", 'the')]
WMH19051011-V03-39-page3.txt: [("'done", 'done')]
WMH19051011-V03-39-page4.txt: [("'This", 'This')]
WMH19051018-V03-40-page1.txt: [("'went", 'went')]
WMH19051018-V03-40-page2.txt: [("'Field.", 'Field.')]
WMH19051018-V03-40-page3.txt: [("'great", 'great')]
WMH19051018-V03-40-page4.txt: [("'one", 'one'), ("'WEST", 'WEST')]
WMH19051025-V03-41-page1.txt: [("'him", 'him')]
WMH19051025-V03-41-page2.txt: [("'of", 'of')]
WMH19051025-V03-41-page3.txt: [("'twill", 'twill'), ("'Field.", 'Field.')]
WMH19051101-V03-42-page2.txt: [("'field.", 'field.')]
WMH19051101-V03-42-page3.txt: [("'ferers", 'ferers')]
WMH19051108-V03-43-page1.txt: [("'Field.", 'Field.')]
WMH19051108-V03-43-page3.txt: [("'be", 'be')]
WMH19051115-V03-44-page4.txt: [("'school", 'school')]
WMH19051122-V03-45-page1.txt: [("'Seeking", 'Seeking')]
WMH19051122-V03-45-page3.txt: [("'Week", 'Week')]
WMH19051129-V03-46-page2.txt: [("'at", 'at'), ("'Enter", 'Enter'), ("'Field.", 'Field.')]
WMH19051129-V03-46-page3.txt: [("'Word.", 'Word.')]
WMH19051206-V03-47-page2.txt: [("'are", 'are')]
WMH19051213-V03-48-page1.txt: [("'Tis", 'Tis'), ("'Tis", 'Tis')]
WMH19051213-V03-48-page2.txt: [("'Field.", 'Field.')]
WMH19051213-V03-48-page4.txt: [("'voiced", 'voiced'), ("'three", 'three'), ("'for", 'for')]
WMH19051220-V03-49-page1.txt: [("'tis", 'tis'), ("'All", 'All')]
WMH19051220-V03-49-page2.txt: [("'.", '.'), ("'.", '.')]
WMH19051220-V03-49-page3.txt: [("'Field.", 'Field.')]
WMH19051227-V03-50-page1.txt: [("'study", 'study')]
WMH19051227-V03-50-page3.txt: [("'margin.", 'margin.')]
WMH19051227-V03-50-page4.txt: [('\'"', '"')]
WMH19060103-V04-01-page1.txt: [("'field.", 'field.')]
WMH19060103-V04-01-page4.txt: [("'Christ", 'Christ')]
WMH19060110-V04-02-page2.txt: [("'for", 'for'), ('\'strength."', 'strength."'), ("'Financial.", 'Financial.')]
WMH19060110-V04-02-page3.txt: [("'that", 'that'), ("'that", 'that')]
WMH19060110-V04-02-page4.txt: [("'paper", 'paper')]
WMH19060117-V04-03-page4.txt: [("'o", 'o'), ("'o", 'o')]
WMH19060124-V04-04-page1.txt: [("'I", 'I'), ("'ministers", 'ministers')]
WMH19060124-V04-04-page2.txt: [("'for", 'for'), ("'in", 'in')]
WMH19060131-V04-05-page2.txt: [("'Statement", 'Statement'), ("'o", 'o'), ("'o", 'o'), ("'o", 'o')]
WMH19060131-V04-05-page3.txt: [("'late", 'late'), ("'field.", 'field.')]
WMH19060131-V04-05-page4.txt: [("'E.", 'E.')]
WMH19060207-V04-06-page3.txt: [("'Field.", 'Field.')]
WMH19060214-V04-07-page1.txt: [("'for", 'for')]
WMH19060214-V04-07-page2.txt: [("'should", 'should')]
WMH19060214-V04-07-page3.txt: [("'that", 'that')]
WMH19060221-V04-08-page1.txt: [("'church", 'church')]
WMH19060221-V04-08-page2.txt: [("'.ed", '.ed')]
WMH19060221-V04-08-page3.txt: [("'operation", 'operation')]
WMH19060221-V04-08-page4.txt: [("'or", 'or')]
WMH19060228-V04-09-page3.txt: [("'Field.", 'Field.'), ("'Turn", 'Turn')]
WMH19060228-V04-09-page4.txt: [("'U.", 'U.')]
WMH19060307-V04-10-page2.txt: [("'a", 'a'), ("'Field.", 'Field.')]
WMH19060307-V04-10-page3.txt: [("'are", 'are'), ("'we", 'we')]
WMH19060314-V04-11-page2.txt: [("'C.", 'C.')]
WMH19060314-V04-11-page3.txt: [("'Hosanna", 'Hosanna')]
WMH19060321-V04-12-page1.txt: [("'worker", 'worker')]
WMH19060321-V04-12-page2.txt: [("'Field.", 'Field.'), ("'whom", 'whom')]
WMH19060321-V04-12-page3.txt: [("'the", 'the')]
WMH19060328-V04-13-page2.txt: [("'Field.", 'Field.')]
WMH19060411-V04-15-page1.txt: [("'Union", 'Union')]
WMH19060411-V04-15-page2.txt: [("'Financial.", 'Financial.'), ("'Field.", 'Field.')]
WMH19060411-V04-15-page3.txt: [("'so", 'so')]
WMH19060411-V04-15-page4.txt: [("'amount", 'amount'), ("'Dietetics", 'Dietetics')]
WMH19060425-V04-17-page1.txt: [("'for", 'for')]
WMH19060425-V04-17-page3.txt: [("'of", 'of'), ("'crowned", 'crowned')]
WMH19060425-V04-17-page4.txt: [("'straw", 'straw')]
WMH19060502-V04-18-page3.txt: [("'goo.", 'goo.'), ("'a", 'a'), ("'will", 'will'), ("'small", 'small')]
WMH19060502-V04-18-page4.txt: [("'.Wells", '.Wells')]
WMH19060509-V04-19-page1.txt: [("'i", 'i')]
WMH19060509-V04-19-page3.txt: [("'Field.", 'Field.'), ("'to", 'to')]
WMH19060509-V04-19-page4.txt: [("'the", 'the')]
WMH19060523-V04-20-page3.txt: [("'Field.", 'Field.')]
WMH19060530-V04-21-page2.txt: [("'this", 'this')]
WMH19060530-V04-21-page3.txt: [("'greater", 'greater'), ("'Field.", 'Field.')]
WMH19060613-V04-23-page3.txt: [("'To", 'To')]
WMH19060613-V04-23-page4.txt: [("'new", 'new')]
WMH19060620-V04-24-page1.txt: [("'been", 'been')]
WMH19060620-V04-24-page3.txt: [("'them", 'them')]
WMH19060627-V04-25-page2.txt: [("'Field.", 'Field.')]
WMH19060627-V04-25-page4.txt: [("'up", 'up')]
WMH19060704-V04-26-page1.txt: [("'Bible", 'Bible'), ("'SEGO", 'SEGO')]
WMH19060704-V04-26-page3.txt: [("'Tis", 'Tis')]
WMH19060704-V04-26-page4.txt: [("'the", 'the'), ("'at", 'at'), ("'in", 'in')]
WMH19060711-V04-27-page1.txt: [("'tis", 'tis')]
WMH19060711-V04-27-page3.txt: [("'lath", 'lath'), ('\'Times"', 'Times"')]
WMH19060711-V04-27-page4.txt: [("'Field.", 'Field.')]
WMH19060718-V04-28-page1.txt: [("'ilm", 'ilm')]
WMH19060725-V04-29-page1.txt: [("'Field.", 'Field.'), ("'tiff", 'tiff'), ('\'"', '"')]
WMH19060725-V04-29-page3.txt: [("'prayer", 'prayer'), ("'and", 'and')]
WMH19060725-V04-29-page4.txt: [("'EZRA", 'EZRA')]
WMH19060801-V04-30-page1.txt: [("'great", 'great'), ("'We", 'We')]
WMH19060801-V04-30-page2.txt: [("'loose", 'loose'), ("'Ye", 'Ye'), ("'us", 'us'), ("'victory", 'victory')]
WMH19060801-V04-30-page3.txt: [("'Field.", 'Field.')]
WMH19060801-V04-30-page4.txt: [("'When", 'When'), ("'And", 'And')]
WMH19060808-V04-31-page3.txt: [("'Cedar", 'Cedar'), ("'Field.", 'Field.')]
WMH19060822-V04-32-page2.txt: [("'perfect", 'perfect'), ("'text", 'text'), ("'may", 'may'), ("'be", 'be'), ("'SUNDAY", 'SUNDAY')]
WMH19060822-V04-32-page3.txt: [("'Good", 'Good'), ("'that", 'that'), ("'that", 'that'), ("'.", '.')]
WMH19060829-V04-33-page4.txt: [("'twas", 'twas'), ("'.", '.'), ("'Twill", 'Twill'), ("'keep", 'keep'), ("'Tis", 'Tis'), ("'tis", 'tis'), ("'keep", 'keep')]
WMH19060905-V04-34-page1.txt: [("'Christ.", 'Christ.'), ("'made", 'made')]
WMH19060905-V04-34-page2.txt: [("'work", 'work')]
WMH19060905-V04-34-page4.txt: [("'Without", 'Without')]
WMH19060912-V04-35-page1.txt: [("'T", 'T')]
WMH19060912-V04-35-page2.txt: [("'train", 'train')]
WMH19060912-V04-35-page3.txt: [("'before", 'before')]
WMH19060912-V04-35-page4.txt: [("'Field.", 'Field.')]
WMH19060919-V04-36-page1.txt: [("'Till", 'Till'), ("'Twill", 'Twill'), ("'Twill", 'Twill'), ("'people.", 'people.')]
WMH19060919-V04-36-page4.txt: [("'be", 'be')]
WMH19060926-V04-37-page1.txt: [("'of", 'of')]
WMH19060926-V04-37-page2.txt: [("'Why", 'Why'), ("'I", 'I')]
WMH19060926-V04-37-page3.txt: [("'them", 'them')]
WMH19061003-V04-38-page2.txt: [("'they", 'they')]
WMH19061003-V04-38-page3.txt: [("'being", 'being'), ("'variety", 'variety'), ("'cloth", 'cloth')]
WMH19061003-V04-38-page4.txt: [("'Bertha", 'Bertha')]
WMH19061010-V04-39-page2.txt: [("'s", 's'), ("'is", 'is')]
WMH19061010-V04-39-page3.txt: [("'Field.", 'Field.')]
WMH19061010-V04-39-page4.txt: [("'reports", 'reports'), ("'twixt", 'twixt'), ("'tis", 'tis'), ("'twixt", 'twixt'), ("'and", 'and')]
WMH19061017-V04-40-page1.txt: [("'the", 'the'), ("'.", '.'), ("'field.", 'field.')]
WMH19061017-V04-40-page3.txt: [("'followed", 'followed')]
WMH19061024-V04-41-page2.txt: [("'the", 'the')]
WMH19061024-V04-41-page3.txt: [("'field.", 'field.'), ("'look", 'look')]
WMH19061031-V04-42-page1.txt: [("'W.", 'W.')]
WMH19061031-V04-42-page2.txt: [("'disposition", 'disposition'), ("'We", 'We')]
WMH19061031-V04-42-page3.txt: [("'Field.", 'Field.')]
WMH19061031-V04-42-page4.txt: [("'But", 'But')]
WMH19061107-V04-43-page1.txt: [("'well", 'well')]
WMH19061107-V04-43-page2.txt: [("'root", 'root'), ("'Christ's", 'Christs')]
WMH19061107-V04-43-page3.txt: [("'Field.", 'Field.')]
WMH19061107-V04-43-page4.txt: [("'of", 'of')]
WMH19061114-V04-44-page1.txt: [("'stumps", 'stumps'), ("'illl", 'illl')]
WMH19061114-V04-44-page2.txt: [("'W.", 'W.')]
WMH19061114-V04-44-page3.txt: [("'Field.", 'Field.'), ("'for", 'for')]
WMH19061114-V04-44-page4.txt: [("'Young", 'Young')]
WMH19061121-V04-45-page1.txt: [("'Ole", 'Ole'), ("'and", 'and')]
WMH19061121-V04-45-page2.txt: [("'first", 'first')]
WMH19061121-V04-45-page3.txt: [("'and", 'and')]
WMH19061121-V04-45-page4.txt: [("'now", 'now')]
WMH19061128-V04-46-page1.txt: [("'Nur", 'Nur'), ("'SEGO", 'SEGO')]
WMH19061128-V04-46-page3.txt: [("'has", 'has')]
WMH19061205-V04-47-page2.txt: [("'desire", 'desire'), ("'Your", 'Your')]
WMH19061205-V04-47-page3.txt: [("'West", 'West'), ("'Field.", 'Field.')]
WMH19061212-V04-48-page2.txt: [("'field.", 'field.')]
WMH19061219-V04-49-page1.txt: [("'....", '....'), ("'necessities", 'necessities')]
WMH19061226-V04-50-page2.txt: [("'Field.", 'Field.')]
WMH19070102-V05-01-page3.txt: [("'are", 'are')]
WMH19070102-V05-01-page4.txt: [("'power.", 'power.')]
WMH19070109-V05-02-page1.txt: [("'but", 'but')]
WMH19070116-V05-03-page3.txt: [("'God", 'God')]
WMH19070116-V05-03-page4.txt: [("'of", 'of'), ("'field.", 'field.')]
WMH19070123-V05-04-page2.txt: [("'rrufant", 'rrufant'), ("'never", 'never'), ("'any", 'any')]
WMH19070130-V05-05-page1.txt: [("'as", 'as')]
WMH19070130-V05-05-page2.txt: [("'educational", 'educational')]
WMH19070130-V05-05-page3.txt: [("'us", 'us'), ("'that", 'that')]
WMH19070206-V05-06-page1.txt: [('\'"', '"'), ("'last", 'last')]
WMH19070206-V05-06-page2.txt: [("'Financial.", 'Financial.')]
WMH19070206-V05-06-page4.txt: [("'as", 'as')]
WMH19070213-V05-07-page2.txt: [("'dollars", 'dollars')]
WMH19070220-V05-08-page1.txt: [("'orders", 'orders')]
WMH19070220-V05-08-page3.txt: [("'for", 'for')]
WMH19070227-V05-09-page1.txt: [("'rent", 'rent'), ("'o", 'o')]
WMH19070227-V05-09-page2.txt: [("'o", 'o')]
WMH19070306-V05-10-page1.txt: [("'Woe", 'Woe')]
WMH19070320-V05-12-page2.txt: [("'In", 'In')]
WMH19070320-V05-12-page3.txt: [("'Financial.", 'Financial.'), ("'Battle", 'Battle')]
WMH19070327-V05-13-page1.txt: [("'what", 'what')]
WMH19070327-V05-13-page2.txt: [("'endorsed", 'endorsed'), ("'cOi", 'cOi'), ("'Field.", 'Field.'), ("'Come", 'Come')]
WMH19070327-V05-13-page3.txt: [("'March", 'March'), ("'consideration", 'consideration')]
WMH19070327-V05-13-page4.txt: [("'Freemont", 'Freemont')]
WMH19070417-V05-16-page2.txt: [("'the", 'the'), ("'that", 'that')]
WMH19070417-V05-16-page3.txt: [("'Field.", 'Field.')]
WMH19070424-V05-17-page1.txt: [("'enemy", 'enemy')]
WMH19070424-V05-17-page2.txt: [("'foi", 'foi')]
WMH19070424-V05-17-page3.txt: [("'or", 'or'), ("'field.", 'field.')]
WMH19070424-V05-17-page4.txt: [("'for", 'for')]
WMH19070501-V05-18-page1.txt: [("'Wm", 'Wm')]
WMH19070501-V05-18-page2.txt: [("'knew", 'knew')]
WMH19070501-V05-18-page3.txt: [("'say", 'say'), ("'the", 'the'), ("'field.", 'field.')]
WMH19070501-V05-18-page4.txt: [("'Twould", 'Twould'), ("'read", 'read'), ("'I", 'I')]
WMH19070508-V05-19-page1.txt: [("'the", 'the')]
WMH19070515-V05-20-page1.txt: [("'vat", 'vat'), ("'Twill", 'Twill'), ("'twill", 'twill')]
WMH19070515-V05-20-page2.txt: [("'God", 'God'), ("'Third", 'Third')]
WMH19070522-V05-21-page3.txt: [("'Field.", 'Field.')]
WMH19070529-V05-22-page1.txt: [("'Tis", 'Tis')]
WMH19070529-V05-22-page2.txt: [("'Field.", 'Field.')]
WMH19070529-V05-22-page4.txt: [("'Jet", 'Jet')]
WMH19070605-V05-23-page1.txt: [("'y", 'y')]
WMH19070605-V05-23-page4.txt: [("'field.", 'field.')]
WMH19070612-V05-24-page2.txt: [("'the", 'the'), ("'field.", 'field.')]
WMH19070619-V05-25-page1.txt: [("'handle", 'handle'), ("'E.", 'E.')]
WMH19070619-V05-25-page3.txt: [("'Field.", 'Field.')]
WMH19070626-V05-26-page3.txt: [("'Field.", 'Field.'), ("'prevails.", 'prevails.')]
WMH19070703-V05-27-page4.txt: [("'depot", 'depot')]
WMH19070717-V05-29-page4.txt: [("'after", 'after'), ("'the", 'the'), ("'opened", 'opened')]
WMH19070724-V05-30-page1.txt: [("'m", 'm')]
WMH19070724-V05-30-page3.txt: [("'vas", 'vas'), ("'appear", 'appear'), ("'inspiring", 'inspiring')]
WMH19070731-V05-31-page1.txt: [("'Y", 'Y'), ("'children", 'children')]
WMH19070731-V05-31-page3.txt: [("'Ontario", 'Ontario')]
WMH19070807-V05-32-page2.txt: [("'the", 'the')]
WMH19070807-V05-32-page3.txt: [("'a", 'a')]
WMH19070807-V05-32-page4.txt: [("'cello", 'cello'), ("'Field.", 'Field.')]
WMH19070814-V05-33-page3.txt: [("'friend", 'friend')]
WMH19070828-V05-34-page2.txt: [("'men", 'men')]
WMH19070828-V05-34-page3.txt: [("'Field.", 'Field.'), ("'A", 'A'), ("'appreciate", 'appreciate')]
WMH19070904-V05-35-page1.txt: [("'how", 'how'), ("'employ", 'employ'), ("'our", 'our'), ("'a", 'a')]
WMH19070904-V05-35-page2.txt: [("'the", 'the'), ("'the", 'the'), ("'Do", 'Do')]
WMH19070911-V05-36-page1.txt: [("'Rrimr", 'Rrimr'), ("'classes", 'classes')]
WMH19070911-V05-36-page2.txt: [("'any", 'any')]
WMH19070911-V05-36-page4.txt: [("'the", 'the'), ("'Financial.", 'Financial.'), ("'Iowa", 'Iowa')]
WMH19070918-V05-37-page4.txt: [("'would", 'would'), ("'except", 'except')]
WMH19070925-V05-38-page2.txt: [("'dollars", 'dollars')]
WMH19071002-V05-39-page2.txt: [("'s", 's')]
WMH19071002-V05-39-page4.txt: [("'twixt", 'twixt'), ("'tis", 'tis'), ("'twixt", 'twixt')]
WMH19071009-V05-40-page2.txt: [("'Through", 'Through'), ("'emit", 'emit')]
WMH19071016-V05-41-page1.txt: [("'.", '.')]
WMH19071016-V05-41-page3.txt: [("'faith", 'faith')]
WMH19071023-V05-42-page1.txt: [("'''o", 'o')]
WMH19071030-V05-43-page1.txt: [("'to", 'to')]
WMH19071030-V05-43-page2.txt: [("'Reading", 'Reading')]
WMH19071030-V05-43-page3.txt: [("'until", 'until')]
WMH19071106-V05-44-page1.txt: [("'-", '-')]
WMH19071106-V05-44-page3.txt: [("'beginning", 'beginning'), ("'created", 'created'), ("'form", 'form'), ("'void", 'void'), ("'firmament", 'firmament'), ("'Let", 'Let'), ("'fruit", 'fruit'), ("'signs", 'signs'), ("'seasons", 'seasons')]
WMH19071113-V05-45-page2.txt: [("'There", 'There')]
WMH19071113-V05-45-page3.txt: [("'Trunk's", 'Trunks')]
WMH19071113-V05-45-page4.txt: [("'phones", 'phones'), ("'phone", 'phone')]
WMH19071120-V05-46-page1.txt: [("'Kings", 'Kings')]
WMH19071127-V05-47-page1.txt: [("'The", 'The'), ("'It", 'It')]
WMH19071127-V05-47-page3.txt: [("'now", 'now'), ("'people", 'people'), ("'it", 'it')]
WMH19071204-V05-48-page1.txt: [("'s", 's')]
WMH19071211-V05-49-page1.txt: [("'plEit", 'plEit'), ("'air.", 'air.')]
WMH19071211-V05-49-page2.txt: [("'especially", 'especially'), ("'made", 'made')]
WMH19080101-V06-01-page1.txt: [("'.", '.'), ("'Lis", 'Lis'), ("'Tls", 'Tls')]
WMH19080101-V06-01-page3.txt: [("'field", 'field')]
WMH19080101-V06-01-page4.txt: [("'SIGNS", 'SIGNS'), ("'fifty", 'fifty'), ("'Our", 'Our')]
WMH19080108-V06-02-page2.txt: [("'Field.", 'Field.')]
WMH19080108-V06-02-page3.txt: [("'shall", 'shall'), ("'be", 'be'), ("'sick", 'sick')]
WMH19080115-V06-03-page3.txt: [("'Field.", 'Field.'), ("'o", 'o'), ("'Michigan", 'Michigan')]
WMH19080122-V06-04-page1.txt: [("'Field.", 'Field.'), ("'III", 'III')]
WMH19080122-V06-04-page2.txt: [("'disease", 'disease')]
WMH19080129-V06-05-page1.txt: [("'AdVq", 'AdVq'), ("'tis", 'tis'), ("'greater", 'greater')]
WMH19080129-V06-05-page2.txt: [("'Field.", 'Field.'), ("'see", 'see')]
WMH19080129-V06-05-page3.txt: [("'Christian", 'Christian')]
WMH19080205-V06-06-page1.txt: [("'financial.", 'financial.'), ("'I", 'I')]
WMH19080205-V06-06-page4.txt: [("'once", 'once'), ("'success", 'success')]
WMH19080212-V06-07-page1.txt: [("'Arehk", 'Arehk'), ("'Atlanta", 'Atlanta'), ("'Iowa", 'Iowa')]
WMH19080212-V06-07-page3.txt: [("'Vaunt", 'Vaunt')]
WMH19080212-V06-07-page4.txt: [("'as", 'as'), ("'tis", 'tis')]
WMH19080219-V06-08-page2.txt: [("'RUSSELL.", 'RUSSELL.')]
WMH19080219-V06-08-page4.txt: [("'this", 'this')]
WMH19080226-V06-09-page3.txt: [("'without", 'without')]
WMH19080226-V06-09-page4.txt: [("'to", 'to')]
WMH19080304-V06-10-page1.txt: [("'gave", 'gave')]
WMH19080304-V06-10-page2.txt: [("'Field.", 'Field.')]
WMH19080304-V06-10-page3.txt: [("'a", 'a')]
WMH19080311-V06-11-page1.txt: [("'reflected", 'reflected'), ("'be", 'be')]
WMH19080311-V06-11-page3.txt: [("'Field.", 'Field.')]
WMH19080318-V06-12-page1.txt: [("'.", '.')]
WMH19080318-V06-12-page2.txt: [("'and", 'and')]
WMH19080325-V06-13-page2.txt: [("'spoke", 'spoke'), ("'our", 'our'), ("'minds", 'minds')]
WMH19080325-V06-13-page3.txt: [("'Field.", 'Field.'), ("'earnestly", 'earnestly')]
WMH19080325-V06-13-page4.txt: [("'papers", 'papers')]
WMH19080401-V06-14-page1.txt: [("'in", 'in'), ("'I", 'I')]
WMH19080408-V06-15-page1.txt: [("'the", 'the'), ("'President", 'President')]
WMH19080408-V06-15-page3.txt: [("'something", 'something')]
WMH19080415-V06-16-page1.txt: [("'with", 'with'), ("'E.", 'E.')]
WMH19080415-V06-16-page3.txt: [("'the", 'the'), ("'ii", 'ii')]
WMH19080422-V06-17-page1.txt: [("'a", 'a'), ("'at", 'at')]
WMH19080422-V06-17-page2.txt: [("'the", 'the'), ("'Financial", 'Financial')]
WMH19080422-V06-17-page3.txt: [("'Field.", 'Field.')]
WMH19080429-V06-18-page1.txt: [("'a", 'a')]
WMH19080429-V06-18-page2.txt: [("'race", 'race')]
WMH19080429-V06-18-page3.txt: [("'Field.", 'Field.'), ("'to", 'to'), ("'tention", 'tention'), ("'cause", 'cause')]
WMH19080506-V06-19-page1.txt: [("'III", 'III')]
WMH19080506-V06-19-page2.txt: [("'more", 'more')]
WMH19080506-V06-19-page3.txt: [("'the", 'the')]
WMH19080513-V06-20-page3.txt: [("'be", 'be')]
WMH19080520-V06-21-page1.txt: [("'A", 'A'), ("'in", 'in'), ("'s", 's')]
WMH19080520-V06-21-page3.txt: [("'circle", 'circle'), ("'by", 'by')]
WMH19080520-V06-21-page4.txt: [("'them", 'them')]
WMH19080527-V06-22-page2.txt: [("'to", 'to'), ("'and", 'and'), ("'therefore", 'therefore')]
WMH19080527-V06-22-page3.txt: [("'Field.", 'Field.'), ("'occupying", 'occupying'), ("'our", 'our'), ("'thus", 'thus')]
WMH19080603-V06-23-page1.txt: [("'these", 'these'), ("'do", 'do')]
WMH19080603-V06-23-page2.txt: [("'Field.", 'Field.'), ("'Essay", 'Essay'), ("'hand", 'hand')]
WMH19080603-V06-23-page3.txt: [("'to", 'to')]
WMH19080603-V06-23-page4.txt: [("'so", 'so'), ("'Walter", 'Walter')]
WMH19080610-V06-24-page1.txt: [("'d", 'd')]
WMH19080610-V06-24-page2.txt: [("'They", 'They')]
WMH19080610-V06-24-page3.txt: [("'consecrating", 'consecrating'), ("'for", 'for')]
WMH19080610-V06-24-page4.txt: [("'of", 'of')]
WMH19080617-V06-25-page1.txt: [("'.i", '.i')]
WMH19080617-V06-25-page2.txt: [("'We", 'We')]
WMH19080617-V06-25-page3.txt: [("'in", 'in'), ("'Shelby", 'Shelby')]
WMH19080617-V06-25-page4.txt: [("'been", 'been'), ("'Liberty", 'Liberty')]
WMH19080624-V06-26-page1.txt: [("'much", 'much')]
WMH19080624-V06-26-page2.txt: [("'to", 'to'), ("'o", 'o')]
WMH19080624-V06-26-page3.txt: [("'G.", 'G.')]
WMH19080624-V06-26-page4.txt: [("'Canadian", 'Canadian')]
WMH19080701-V06-27-page2.txt: [("'case", 'case'), ("'I", 'I'), ("'on", 'on'), ("'hindrance", 'hindrance'), ("'tis", 'tis'), ("'and", 'and')]
WMH19080701-V06-27-page3.txt: [("'Fake", 'Fake'), ("'Under", 'Under')]
WMH19080708-V06-28-page1.txt: [("'vt", 'vt'), ("'God's", 'Gods'), ("'funds", 'funds'), ("'bath", 'bath')]
WMH19080708-V06-28-page2.txt: [("'month", 'month')]
WMH19080715-V06-29-page1.txt: [("'to", 'to'), ("'it", 'it')]
WMH19080715-V06-29-page2.txt: [("'Yes", 'Yes'), ("'Why", 'Why'), ("'the", 'the'), ("'I", 'I')]
WMH19080715-V06-29-page3.txt: [("'Some", 'Some'), ("'congregation", 'congregation')]
WMH19080722-V06-30-page1.txt: [("'E.", 'E.')]
WMH19080722-V06-30-page2.txt: [("'this", 'this')]
WMH19080722-V06-30-page3.txt: [("'shown", 'shown')]
WMH19080729-V06-31-page2.txt: [("'patients", 'patients'), ("'will", 'will')]
WMH19080729-V06-31-page4.txt: [("'I", 'I'), ("'Sound", 'Sound'), ("'Elder", 'Elder')]
WMH19080805-V06-32-page1.txt: [("'of", 'of')]
WMH19080805-V06-32-page4.txt: [("'city", 'city'), ("'exercises", 'exercises'), ("'in", 'in'), ("'On", 'On')]
WMH19080812-V06-33-page2.txt: [("'years", 'years')]
WMH19080812-V06-33-page3.txt: [("'Hew", 'Hew'), ("'Take", 'Take')]
WMH19080826-V06-34-page1.txt: [("'an", 'an'), ("'decree", 'decree'), ("'round", 'round'), ("'Twas", 'Twas')]
WMH19080826-V06-34-page4.txt: [("'get", 'get'), ("'prepare", 'prepare')]
WMH19080902-V06-35-page1.txt: [("'them", 'them'), ("'must", 'must'), ("'study", 'study'), ("'Neath", 'Neath')]
WMH19080902-V06-35-page2.txt: [("'entire", 'entire'), ("'o", 'o'), ("'became", 'became')]
WMH19080902-V06-35-page4.txt: [("'happiness.", 'happiness.')]
WMH19080909-V06-36-page1.txt: [("'kingdom", 'kingdom')]
WMH19080909-V06-36-page3.txt: [("'Financial", 'Financial')]
WMH19080916-V06-37-page2.txt: [("'company", 'company')]
WMH19080916-V06-37-page3.txt: [("'Men", 'Men'), ("'so", 'so')]
WMH19080916-V06-37-page4.txt: [("'icopies", 'icopies'), ("'is", 'is')]
WMH19080923-V06-38-page1.txt: [("'acknowledge", 'acknowledge')]
WMH19080923-V06-38-page3.txt: [("'there", 'there')]
WMH19080923-V06-38-page4.txt: [("'It", 'It')]
WMH19080930-V06-39-page1.txt: [("'SABBATH", 'SABBATH')]
WMH19081007-V06-40-page1.txt: [("'OW", 'OW')]
WMH19081007-V06-40-page2.txt: [("'Tis", 'Tis'), ("'of", 'of')]
WMH19081007-V06-40-page3.txt: [("'a", 'a'), ("'o", 'o'), ("'Well", 'Well'), ("'I", 'I')]
WMH19081014-V06-41-page2.txt: [("'and", 'and'), ("'who", 'who')]
WMH19081021-V06-42-page1.txt: [("'unless", 'unless'), ("'missions.", 'missions.')]
WMH19081021-V06-42-page2.txt: [("'that", 'that'), ("'Abstain", 'Abstain')]
WMH19081021-V06-42-page3.txt: [("'years", 'years')]
WMH19081021-V06-42-page4.txt: [("'toward", 'toward')]
WMH19081028-V06-43-page1.txt: [("'was", 'was')]
WMH19081028-V06-43-page3.txt: [("'there", 'there')]
WMH19081028-V06-43-page4.txt: [("'subscribers", 'subscribers')]
WMH19081104-V06-44-page1.txt: [("'tithe", 'tithe'), ("'the", 'the'), ("'graves", 'graves')]
WMH19081104-V06-44-page2.txt: [("'.be", '.be'), ("'Falls", 'Falls'), ("'to", 'to'), ("'is", 'is')]
WMH19081104-V06-44-page4.txt: [("'liberally", 'liberally')]
WMH19081111-V06-45-page1.txt: [("'tives", 'tives'), ("'summer", 'summer')]
WMH19081111-V06-45-page3.txt: [("'Genesis", 'Genesis'), ("'separated", 'separated'), ("'lights", 'lights'), ("'beginning", 'beginning'), ("'created", 'created'), ("'signs", 'signs'), ("'form", 'form'), ("'firmament", 'firmament'), ("'Heaven.", 'Heaven.'), ("'seasons", 'seasons')]
In [36]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction4

Average verified rate: 0.9771874993174814

Average of error rates: 0.024615532118887822

Total token count: 915726

In [37]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[37]:
[('m', 1750),
 ('w', 1506),
 ('g', 1439),
 ('e', 1317),
 ('d', 1279),
 ('r', 688),
 ('n', 645),
 ("'", 490),
 ('f', 446),
 ('t', 384),
 ('th', 283),
 ('co', 172),
 ('oo', 171),
 ('sabbathschool', 163),
 ('io', 120),
 ('mt', 108),
 ('k', 107),
 ('ro', 96),
 ('wm', 83),
 ('numbess', 75),
 ('re', 71),
 ('u', 70),
 ("canvassers'", 58),
 ('x', 46),
 ('horr', 39),
 ("the'", 38),
 ('rd', 33),
 ('blendon', 32),
 ('ex', 32),
 ('brower', 31),
 ('harnden', 30),
 ("f'd", 30),
 ('mchugh', 29),
 ('seventhday', 28),
 ('nd', 28),
 ('cleora', 27),
 ('tion', 25),
 ('nunica', 23),
 ('sabbathschools', 23),
 ('q', 23),
 ('-', 22),
 ('vowyla', 21),
 ('al', 21),
 ('loth', 20),
 ('z', 20),
 ('fd', 20),
 ('michi', 20),
 ('psa', 20),
 ('ti', 20),
 ('ne', 19)]

Correction 5 -- Rejoin Split Words

In [39]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction5"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    errors = reports.identify_errors(tokens, spelling_dictionary)

    replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=False)
    
    if len(replacements) > 0:
        print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_split_words(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
WMH19030128-V01-04-page4.txt: [('co', 'operate')]
WMH19030415-V01-15-page2.txt: [('IMPRES', 'SIONS')]
WMH19030415-V01-15-page4.txt: [('Verm', 'on')]
WMH19030506-V01-18-page3.txt: [('sugg', 'estion')]
WMH19030513-V01-19-page3.txt: [('th', 'in')]
WMH19030520-V01-20-page3.txt: [('co', 'operating')]
WMH19030603-V01-22-page1.txt: [('TES', 'TAMENT')]
WMH19030603-V01-22-page3.txt: [('AB', 'OLISHED')]
WMH19030603-V01-22-page4.txt: [('th', 'at'), ('co', 'laborers')]
WMH19030610-V01-23-page1.txt: [('pre', 'eminently')]
WMH19030610-V01-23-page3.txt: [('unscript', 'ural')]
WMH19030624-V01-25-page4.txt: [('Mc', 'Bride')]
WMH19030715-V01-28-page1.txt: [('mo', 'at')]
WMH19030715-V01-28-page2.txt: [('th', 'e')]
WMH19030715-V01-28-page3.txt: [('wa', 's'), ('developmen', 't')]
WMH19030722-V01-29-page3.txt: [('Kellog', 'g')]
WMH19031028-V01-43-page1.txt: [('ca', 'use')]
WMH19031118-V01-46-page1.txt: [('co', 'operate')]
WMH19031118-V01-46-page3.txt: [('lig', 'and')]
WMH19040113-V02-03-page3.txt: [('AC', 'CEPT')]
WMH19040203-V02-05-page3.txt: [('GENER', 'AL')]
WMH19040203-V02-05-page4.txt: [("Sailor'", 's')]
WMH19040210-V02-06-page3.txt: [('co', 'operation')]
WMH19040210-V02-06-page4.txt: [('Mc', 'Allister')]
WMH19040309-V02-10-page1.txt: [('CO', 'OPERATE')]
WMH19040323-V02-12-page1.txt: [('conven', 'tions')]
WMH19040330-V02-13-page1.txt: [('RE', 'QUISITE'), ('ac', 'es')]
WMH19040330-V02-13-page3.txt: [('bili', 'Ousness')]
WMH19040406-V02-14-page3.txt: [('co', 'operate')]
WMH19040413-V02-15-page3.txt: [('Dimonda', 'le')]
WMH19040420-V02-16-page1.txt: [('treasur', 'ers')]
WMH19040427-V02-17-page3.txt: [('co', 'workers')]
WMH19040427-V02-17-page4.txt: [('ti', 'the')]
WMH19040608-V02-23-page3.txt: [('co', 'operation')]
WMH19040608-V02-23-page4.txt: [('re', 'employ'), ('Scandina', 'vian')]
WMH19040629-V02-25-page3.txt: [('co', 'workers')]
WMH19040629-V02-25-page4.txt: [('th', 'at')]
WMH19040706-V02-26-page2.txt: [('institu', 'tion')]
WMH19040706-V02-26-page3.txt: [('re', 'turn')]
WMH19040713-V02-27-page1.txt: [('CIRCUM', 'STANCES'), ('DEFI', 'NITELY'), ('Vo', 'L')]
WMH19040720-V02-28-page1.txt: [('re', 'a')]
WMH19040727-V02-29-page1.txt: [('re', 'checking')]
WMH19040803-V02-30-page2.txt: [('re', 'hired')]
WMH19040803-V02-30-page3.txt: [('re', 'engaged')]
WMH19040817-V02-32-page2.txt: [('io', 'n')]
WMH19040914-V02-34-page1.txt: [('co', 'laborers')]
WMH19040914-V02-34-page3.txt: [('re', 'elected')]
WMH19040921-V02-34a-page1.txt: [('Io', 'was')]
WMH19040921-V02-34a-page2.txt: [('Responsib', 'ility')]
WMH19040928-V02-35-page1.txt: [('inacces', 'sible')]
WMH19040928-V02-35-page4.txt: [('re', 'opens')]
WMH19041005-V02-36-page4.txt: [('corre', 'late')]
WMH19041019-V02-38-page4.txt: [('Sabbath-', 'school')]
WMH19041026-V02-39-page2.txt: [('PEO', 'PLE')]
WMH19041026-V02-39-page3.txt: [('PRE', 'SENT')]
WMH19041026-V02-39-page4.txt: [('co', 'laborer')]
WMH19041102-V02-40-page3.txt: [('connec', 'tion'), ('co', 'workers')]
WMH19041102-V02-40-page4.txt: [('Gei', 'sel')]
WMH19041123-V02-43-page1.txt: [('sca', 't')]
WMH19041123-V02-43-page4.txt: [('ti', 'e'), ('co', 'operate')]
WMH19041130-V02-44-page3.txt: [('co', 'operation')]
WMH19041207-V02-45-page4.txt: [('reabsorp', 'tion')]
WMH19041221-V02-47-page3.txt: [('EDI', 'TION'), ('NEC', 'ESSARILY')]
WMH19041228-V02-48-page1.txt: [('re', 'counting')]
WMH19041228-V02-48-page2.txt: [('re', 'consecrated')]
WMH19050104-V03-01-page2.txt: [('exul', 'tantly')]
WMH19050111-V03-02-page2.txt: [('co', 'operate')]
WMH19050118-V03-03-page3.txt: [('Ob', 'ject')]
WMH19050201-V03-04-page2.txt: [('ble', 'ssed')]
WMH19050201-V03-04-page4.txt: [('al', 'a')]
WMH19050208-V03-05-page1.txt: [('Cre', 'W')]
WMH19050215-V03-06-page4.txt: [('re', 'mains')]
WMH19050222-V03-07-page2.txt: [('co', 'operation')]
WMH19050222-V03-07-page3.txt: [('co', 'operate')]
WMH19050301-V03-08-page1.txt: [('Mc', 'Curdy')]
WMH19050322-V03-11-page3.txt: [('greate', 'r')]
WMH19050322-V03-11-page4.txt: [('church-s', 'chool')]
WMH19050405-V03-13-page2.txt: [('humani', 'ty')]
WMH19050413-V03-14-page3.txt: [('GIV', 'EN')]
WMH19050419-V03-15-page1.txt: [('co', 'operation')]
WMH19050503-V03-17-page4.txt: [('un', 'able')]
WMH19050510-V03-18-page4.txt: [('increas', 'ing'), ('ro', 'o')]
WMH19050517-V03-19-page1.txt: [('co', 'operation')]
WMH19050517-V03-19-page3.txt: [('Vermontvi', 'lle')]
WMH19050524-V03-20-page4.txt: [('glor', 'ious')]
WMH19050531-V03-21-page3.txt: [('HOFST', 'RA')]
WMH19050531-V03-21-page4.txt: [('vis', 'ited'), ('se', 'cure')]
WMH19050607-V03-22-page3.txt: [('pu', 'pils'), ('co', 'operation')]
WMH19050614-V03-23-page1.txt: [('ex', 'penses')]
WMH19050614-V03-23-page3.txt: [('Educa', 'tion')]
WMH19050614-V03-23-page4.txt: [('co', 'operation')]
WMH19050621-V03-24-page3.txt: [('re', 'echoed')]
WMH19050628-V03-25-page1.txt: [('re', 'established')]
WMH19050705-V03-26-page2.txt: [('co', 'operation')]
WMH19050705-V03-26-page3.txt: [('soci', 'eties')]
WMH19050712-V03-27-page1.txt: [('re', 'No')]
WMH19050712-V03-27-page4.txt: [('Michi', 'gan')]
WMH19050719-V03-28-page3.txt: [('fi', 'st')]
WMH19050726-V03-29-page1.txt: [('ro', 'o')]
WMH19050802-V03-30-page4.txt: [('co', 'operate')]
WMH19050809-V03-31-page2.txt: [('co', 'operate')]
WMH19050816-V03-32-page1.txt: [('ob', 'ject')]
WMH19050830-V03-33-page3.txt: [('ap', 'plicable')]
WMH19050906-V03-34-page3.txt: [('pl', 'acidity')]
WMH19050906-V03-34-page4.txt: [('Wednes', 'day')]
WMH19050920-V03-36-page2.txt: [('th', 'under'), ('io', 'was')]
WMH19050920-V03-36-page4.txt: [('re', 'elected')]
WMH19051004-V03-38-page3.txt: [('coun', 'sel')]
WMH19051018-V03-40-page2.txt: [('ac', 'cepted')]
WMH19051101-V03-42-page1.txt: [('TI', 'e')]
WMH19051101-V03-42-page3.txt: [('suf', 'ferers')]
WMH19051101-V03-42-page4.txt: [('MESSEN', 'GER'), ('Ne', 'braska')]
WMH19051108-V03-43-page1.txt: [('ro', 'o')]
WMH19051108-V03-43-page3.txt: [('re', 'port')]
WMH19051122-V03-45-page2.txt: [('co', 'operate')]
WMH19051129-V03-46-page2.txt: [('co', 'operation')]
WMH19051206-V03-47-page1.txt: [('io', 'n')]
WMH19051206-V03-47-page2.txt: [('ment', 'on')]
WMH19051206-V03-47-page4.txt: [('re', 'organized')]
WMH19051213-V03-48-page2.txt: [('th', 'a')]
WMH19051213-V03-48-page3.txt: [('Ti', 'the'), ('re', 'vived'), ('ac', 'complished')]
WMH19051220-V03-49-page1.txt: [('swi', 'ft'), ('co', 'operation')]
WMH19051220-V03-49-page2.txt: [('G.', ''), ('co', 'operation'), ('CO', 'OPERATION')]
WMH19051220-V03-49-page3.txt: [('peo', 'ple')]
WMH19051227-V03-50-page1.txt: [('io', 'n')]
WMH19051227-V03-50-page2.txt: [('co', 'operation')]
WMH19060103-V04-01-page4.txt: [('pa', 'per'), ('giv', 'ing')]
WMH19060110-V04-02-page1.txt: [('re', 'acting')]
WMH19060110-V04-02-page2.txt: [('ro', 'o')]
WMH19060124-V04-04-page2.txt: [('INCORPO', 'RATED')]
WMH19060124-V04-04-page3.txt: [('co', 'operating'), ('remem', 'bereth')]
WMH19060131-V04-05-page2.txt: [('Mis', 'o'), ('ro', 'o')]
WMH19060207-V04-06-page1.txt: [('Pr', 'esident'), ('COLPORTE', 'UR'), ('ac', 'cepted')]
WMH19060214-V04-07-page2.txt: [('Smi', 'th'), ('ro', 'o')]
WMH19060221-V04-08-page1.txt: [('co', 'operation')]
WMH19060221-V04-08-page2.txt: [('Treasur', 'er')]
WMH19060228-V04-09-page1.txt: [('Janu', 'ary'), ('co', 'operation')]
WMH19060228-V04-09-page2.txt: [('soci', 'eties')]
WMH19060307-V04-10-page3.txt: [('co', 'operation')]
WMH19060314-V04-11-page1.txt: [('co', 'operation'), ('ite', 'm')]
WMH19060314-V04-11-page3.txt: [('co', 'operation'), ('Pa', 'w')]
WMH19060314-V04-11-page4.txt: [('MICHI', 'GAN')]
WMH19060321-V04-12-page1.txt: [('temporari', 'ly')]
WMH19060321-V04-12-page3.txt: [('Pa', 'w')]
WMH19060328-V04-13-page1.txt: [('co', 'operation')]
WMH19060328-V04-13-page4.txt: [('suf', 'fering')]
WMH19060411-V04-15-page1.txt: [('co', 'operation')]
WMH19060411-V04-15-page2.txt: [('ro', 'o')]
WMH19060425-V04-17-page1.txt: [('institu', 'tions')]
WMH19060502-V04-18-page1.txt: [("Sec'", 'y')]
WMH19060502-V04-18-page3.txt: [('ARBEI', 'TER')]
WMH19060509-V04-19-page2.txt: [('PROPH', 'ECY')]
WMH19060523-V04-20-page2.txt: [('appropria', 'tion')]
WMH19060523-V04-20-page3.txt: [('th', 'a')]
WMH19060606-V04-22-page2.txt: [('re', 'establish')]
WMH19060613-V04-23-page3.txt: [('fl', 'o')]
WMH19060613-V04-23-page4.txt: [('ES', 'SENTIAL')]
WMH19060620-V04-24-page2.txt: [('RE', 'PENTED')]
WMH19060620-V04-24-page4.txt: [('ut', 'A'), ('recitati', 'on')]
WMH19060627-V04-25-page4.txt: [('ro', 'o')]
WMH19060704-V04-26-page1.txt: [('ex', 'ample'), ('li', 'i')]
WMH19060711-V04-27-page2.txt: [('ro', 'Jo')]
WMH19060711-V04-27-page3.txt: [('Re', 'populated'), ('re', 'populated')]
WMH19060718-V04-28-page1.txt: [('mo', 'I')]
WMH19060718-V04-28-page2.txt: [('co', 'operation')]
WMH19060718-V04-28-page4.txt: [('appe', 'tites')]
WMH19060801-V04-30-page3.txt: [('ap', 'plications')]
WMH19060808-V04-31-page1.txt: [('APPRO', 'PRIATED'), ('gra', 'ger')]
WMH19060808-V04-31-page2.txt: [('th', 'at'), ('co', 'operation')]
WMH19060822-V04-32-page3.txt: [('re', 'assure')]
WMH19060905-V04-34-page2.txt: [('SPE', 'CIFIC')]
WMH19060912-V04-35-page3.txt: [('re', 'turn')]
WMH19061010-V04-39-page1.txt: [('re', 'locate')]
WMH19061017-V04-40-page1.txt: [('TA', 'RE')]
WMH19061017-V04-40-page3.txt: [('es', 'sential')]
WMH19061024-V04-41-page3.txt: [('hov', 'els')]
WMH19061031-V04-42-page1.txt: [('INTERNA', 'TIONAL')]
WMH19061107-V04-43-page1.txt: [('Wr', 'IST')]
WMH19061114-V04-44-page3.txt: [('submerg', 'ed')]
WMH19061128-V04-46-page1.txt: [('whi', 'le'), ('co', 'operation')]
WMH19061128-V04-46-page4.txt: [('co', 'operate')]
WMH19061205-V04-47-page1.txt: [('moun', 'tains')]
WMH19061226-V04-50-page1.txt: [('municipali', 'ty')]
WMH19061226-V04-50-page3.txt: [('consi', 'dered')]
WMH19070102-V05-01-page1.txt: [('confere', 'e')]
WMH19070102-V05-01-page2.txt: [('Scandi', 'navian'), ('educa', 'tional')]
WMH19070109-V05-02-page1.txt: [('co', 'operate'), ('desti', 'tute')]
WMH19070109-V05-02-page3.txt: [('Meri', 'dian')]
WMH19070116-V05-03-page2.txt: [('resum', 'ing')]
WMH19070116-V05-03-page3.txt: [('founda', 'tion')]
WMH19070123-V05-04-page1.txt: [('Ti', 'the')]
WMH19070123-V05-04-page3.txt: [('secur', 'ing'), ('Co', 'operation'), ('co', 'operation'), ('includ', 'ing')]
WMH19070130-V05-05-page4.txt: [('RE', 'PORTS')]
WMH19070206-V05-06-page4.txt: [('co', 'operate')]
WMH19070213-V05-07-page2.txt: [('co', 'operate')]
WMH19070227-V05-09-page2.txt: [('Ti', 'the')]
WMH19070306-V05-10-page2.txt: [('requisi', 'tes')]
WMH19070306-V05-10-page3.txt: [('thi', 'nking'), ('pre', 'sented')]
WMH19070313-V05-11-page4.txt: [('es', 't')]
WMH19070327-V05-13-page2.txt: [('disci', 'pline')]
WMH19070327-V05-13-page3.txt: [('consci', 'entious')]
WMH19070327-V05-13-page4.txt: [('RE', 'CEIVED')]
WMH19070410-V05-15-page1.txt: [('Stockda', 'le'), ('identit', 'y')]
WMH19070410-V05-15-page2.txt: [('Legis', 'lature')]
WMH19070410-V05-15-page3.txt: [('co', 'operation')]
WMH19070410-V05-15-page4.txt: [('Pottervi', 'lle')]
WMH19070417-V05-16-page2.txt: [('re', 'quire')]
WMH19070424-V05-17-page1.txt: [('peo', 'ple')]
WMH19070424-V05-17-page3.txt: [('es', 'to')]
WMH19070501-V05-18-page3.txt: [('accompl', 'ished')]
WMH19070508-V05-19-page1.txt: [('Co', 'operate'), ('co', 'operate')]
WMH19070515-V05-20-page2.txt: [('th', 'a')]
WMH19070529-V05-22-page2.txt: [('ele', 'vated')]
WMH19070612-V05-24-page1.txt: [('pA', 'w')]
WMH19070619-V05-25-page3.txt: [('co', 'operation')]
WMH19070703-V05-27-page3.txt: [('ca', 'm')]
WMH19070703-V05-27-page4.txt: [('th', 'a')]
WMH19070710-V05-28-page4.txt: [('th', 'e')]
WMH19070731-V05-31-page1.txt: [('re', 'No')]
WMH19070731-V05-31-page2.txt: [('re', 'opened')]
WMH19070814-V05-33-page3.txt: [('mis', 'pronounces')]
WMH19070918-V05-37-page4.txt: [('LAN', 'GUAGE')]
WMH19070925-V05-38-page1.txt: [('re', 'No')]
WMH19071002-V05-39-page1.txt: [('Sanitari', 'um'), ('re', 'locate'), ('larg', 'ely'), ('M.', '')]
WMH19071002-V05-39-page4.txt: [('Whi', 'tmarsh')]
WMH19071009-V05-40-page2.txt: [('enf', 'orced')]
WMH19071016-V05-41-page1.txt: [('un', 'Christian')]
WMH19071016-V05-41-page2.txt: [('attendi', 'ng')]
WMH19071023-V05-42-page3.txt: [('co', 'worker')]
WMH19071106-V05-44-page1.txt: [('co', 'operation')]
WMH19071106-V05-44-page2.txt: [('DISESTAB', 'LISHED')]
WMH19071106-V05-44-page4.txt: [('transla', 'tions')]
WMH19071120-V05-46-page3.txt: [('re', 'enlisted')]
WMH19071120-V05-46-page4.txt: [('bi', 'nary')]
WMH19071127-V05-47-page3.txt: [('RE', 'VIEW')]
WMH19071211-V05-49-page1.txt: [('Sa', 'tan')]
WMH19071211-V05-49-page4.txt: [('Ti', 'the')]
WMH19071218-V05-50-page1.txt: [('Notwith', 'standing')]
WMH19071218-V05-50-page3.txt: [('criti', 'cising'), ('recom', 'mendation')]
WMH19080101-V06-01-page4.txt: [('Otseg', 'o')]
WMH19080108-V06-02-page2.txt: [('AMA', 'DON')]
WMH19080108-V06-02-page4.txt: [('th', 'at')]
WMH19080115-V06-03-page2.txt: [('re', 'enact')]
WMH19080115-V06-03-page3.txt: [('distri', 'bution'), ('ro', 'o')]
WMH19080122-V06-04-page4.txt: [('Wellspri', 'ng')]
WMH19080129-V06-05-page3.txt: [('co', 'operates')]
WMH19080205-V06-06-page3.txt: [('counte', 'nance'), ('Brot', 'her')]
WMH19080212-V06-07-page1.txt: [('Sa', 'bbath')]
WMH19080212-V06-07-page4.txt: [('Se', 'Lected')]
WMH19080219-V06-08-page2.txt: [('Ay', 'ars')]
WMH19080226-V06-09-page1.txt: [('co', 'operate')]
WMH19080226-V06-09-page2.txt: [('commi', 'ttees')]
WMH19080304-V06-10-page1.txt: [('tA', 'M')]
WMH19080304-V06-10-page4.txt: [('discipl', 'ine'), ('un', 'planned')]
WMH19080311-V06-11-page1.txt: [('famil', 'iar')]
WMH19080311-V06-11-page4.txt: [('co', 'operation')]
WMH19080325-V06-13-page1.txt: [('re', 'read'), ('Ev', 'a')]
WMH19080415-V06-16-page1.txt: [('EDUCATIONA', 'L'), ('Mc', 'Reynolds')]
WMH19080415-V06-16-page4.txt: [('th', 'or')]
WMH19080422-V06-17-page2.txt: [('Pottervi', 'lle')]
WMH19080429-V06-18-page1.txt: [('co', 'operate')]
WMH19080506-V06-19-page4.txt: [('ren', 'a')]
WMH19080513-V06-20-page1.txt: [('mo', 'rA'), ('co', 'operate')]
WMH19080513-V06-20-page2.txt: [('co', 'operation'), ('sor', 'a')]
WMH19080603-V06-23-page1.txt: [('mechani', 'Cal')]
WMH19080603-V06-23-page3.txt: [('co', 'operative')]
WMH19080617-V06-25-page2.txt: [('includ', 'ing')]
WMH19080701-V06-27-page1.txt: [('ex', 'pressive')]
WMH19080701-V06-27-page2.txt: [('re', 'union')]
WMH19080708-V06-28-page1.txt: [('FI', 'ELD')]
WMH19080708-V06-28-page3.txt: [('retur', 'ned')]
WMH19080715-V06-29-page1.txt: [('co', 'operation')]
WMH19080715-V06-29-page3.txt: [('th', 'at')]
WMH19080722-V06-30-page3.txt: [('ob', 'jections')]
WMH19080722-V06-30-page4.txt: [('DEYOU', 'NG')]
WMH19080729-V06-31-page1.txt: [('al', 'ways')]
WMH19080805-V06-32-page2.txt: [('re', 'sided')]
WMH19080812-V06-33-page3.txt: [('GENER', 'AL')]
WMH19080909-V06-36-page2.txt: [('auspi', 'ces')]
WMH19080916-V06-37-page2.txt: [('co', 'workers')]
WMH19080923-V06-38-page2.txt: [('Educa', 'tional')]
WMH19080930-V06-39-page1.txt: [('gi', 'a')]
WMH19080930-V06-39-page2.txt: [('prepar', 'ed')]
WMH19081007-V06-40-page4.txt: [('Al', 'ma')]
WMH19081028-V06-43-page1.txt: [('gos', 'pel')]
WMH19081028-V06-43-page2.txt: [('RE', 'QUIRED')]
WMH19081104-V06-44-page4.txt: [('MICHI', 'GAN')]
WMH19081111-V06-45-page1.txt: [('na', 'tives')]
In [42]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction5

Average verified rate: 0.9775893748620843

Average of error rates: 0.02419558964525408

Total token count: 915414

In [43]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[43]:
[('m', 1749),
 ('w', 1503),
 ('g', 1438),
 ('e', 1316),
 ('d', 1279),
 ('r', 687),
 ('n', 642),
 ("'", 490),
 ('f', 446),
 ('t', 381),
 ('th', 275),
 ('oo', 171),
 ('sabbathschool', 163),
 ('io', 115),
 ('mt', 108),
 ('k', 107),
 ('co', 101),
 ('ro', 96),
 ('wm', 83),
 ('numbess', 75),
 ('u', 70),
 ("canvassers'", 58),
 ('x', 46),
 ('horr', 39),
 ("the'", 38),
 ('rd', 33),
 ('blendon', 32),
 ('brower', 31),
 ('ex', 30),
 ('harnden', 30),
 ("f'd", 30),
 ('mchugh', 29),
 ('re', 29),
 ('seventhday', 28),
 ('nd', 28),
 ('cleora', 27),
 ('q', 23),
 ('nunica', 23),
 ('sabbathschools', 23),
 ('-', 22),
 ('tion', 21),
 ('vowyla', 21),
 ('fd', 20),
 ('psa', 20),
 ('z', 20),
 ('loth', 20),
 ('numbeps', 19),
 ('ioo', 19),
 ('drury', 18),
 ('hoffstra', 18)]

Correction 6 -- Rejoin Split Words II

In [45]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction6"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)
    
    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    errors = reports.identify_errors(tokens, spelling_dictionary)

    replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=True)
    
    if len(replacements) > 0:
        print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_split_words(replacement, content)
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
WMH19030415-V01-15-page4.txt: [('Confer', 'ence'), ('Gene', 'al'), ('Vermon', 'tville')]
WMH19030520-V01-20-page4.txt: [('depart', 'ment')]
WMH19030603-V01-22-page2.txt: [('CON', 'FERENCE')]
WMH19030701-V01-26-page1.txt: [('DEPART', 'MENT')]
WMH19030722-V01-29-page1.txt: [('r', 'esented')]
WMH19030722-V01-29-page4.txt: [('cam', 'pmeeting')]
WMH19031118-V01-46-page1.txt: [('mission', 'ary')]
WMH19031118-V01-46-page3.txt: [('in', 'stil')]
WMH19040127-V02-04-page4.txt: [('improve', 'ments')]
WMH19040203-V02-05-page4.txt: [('at', 'tention'), ('INSTRUCT', 'ORS')]
WMH19040210-V02-06-page4.txt: [('San', 'itarium')]
WMH19040224-V02-08-page1.txt: [('priv', 'ilege')]
WMH19040224-V02-08-page4.txt: [('S', 'hool')]
WMH19040302-V02-09-page2.txt: [('atone', 'ment')]
WMH19040323-V02-12-page1.txt: [('in', 'terest')]
WMH19040330-V02-13-page4.txt: [('maili', 'ng')]
WMH19040413-V02-15-page3.txt: [('an', 'nouncing')]
WMH19040413-V02-15-page4.txt: [('SOUTH', 'ERN')]
WMH19040420-V02-16-page1.txt: [('treasur', 'ers')]
WMH19040504-V02-18-page2.txt: [('m', 'ost')]
WMH19040504-V02-18-page4.txt: [('kin', 'gdom')]
WMH19040518-V02-20-page4.txt: [('per', 'sonal')]
WMH19040608-V02-23-page4.txt: [('Sec', 'retary'), ('Comm', 'ittee')]
WMH19040629-V02-25-page4.txt: [('T', 'oo')]
WMH19040706-V02-26-page3.txt: [('world', 'liness')]
WMH19040706-V02-26-page4.txt: [('Ed', 'itor')]
WMH19040720-V02-28-page1.txt: [('rea', 'ppear'), ('re', 'ligious')]
WMH19040803-V02-30-page4.txt: [('n', 'ay')]
WMH19040810-V02-31-page2.txt: [('r', 'emove')]
WMH19040817-V02-32-page1.txt: [('ha', 'lf')]
WMH19040914-V02-34-page3.txt: [('Bes', 'sie')]
WMH19041005-V02-36-page3.txt: [('THANK', 'FUL')]
WMH19041005-V02-36-page4.txt: [('r', 'oo')]
WMH19041019-V02-38-page4.txt: [('Bat', 'tle')]
WMH19041026-V02-39-page2.txt: [('EN', 'GAGED')]
WMH19041102-V02-40-page3.txt: [('connec', 'tion')]
WMH19041109-V02-41-page1.txt: [('San', 'itarium')]
WMH19041130-V02-44-page2.txt: [('a', 'ssociation')]
WMH19041130-V02-44-page3.txt: [('depart', 'ment')]
WMH19041207-V02-45-page2.txt: [('confer', 'ence')]
WMH19041207-V02-45-page4.txt: [('elim', 'inated'), ('reabsorp', 'tion')]
WMH19041221-V02-47-page3.txt: [('REG', 'ULAR')]
WMH19041228-V02-48-page1.txt: [('and', 're')]
WMH19050104-V03-01-page1.txt: [('to', 'ft')]
WMH19050104-V03-01-page4.txt: [('t', 'wo')]
WMH19050111-V03-02-page2.txt: [('Wash', 'ington')]
WMH19050111-V03-02-page4.txt: [('faith', 'ful')]
WMH19050201-V03-04-page3.txt: [('K', 'inderhook')]
WMH19050222-V03-07-page3.txt: [('CAN', 'VASSERS')]
WMH19050322-V03-11-page6.txt: [('A', 'nna'), ('vi', 'ne')]
WMH19050405-V03-13-page1.txt: [('CON', 'SIDER')]
WMH19050419-V03-15-page1.txt: [('confer', 'ence')]
WMH19050419-V03-15-page3.txt: [('So', 'ciety')]
WMH19050503-V03-17-page3.txt: [('par', 'ents')]
WMH19050510-V03-18-page1.txt: [('teach', 'ers')]
WMH19050517-V03-19-page4.txt: [('Confer', 'ence')]
WMH19050531-V03-21-page3.txt: [('MICH', 'IGAN')]
WMH19050614-V03-23-page1.txt: [('ex', 'penses')]
WMH19050621-V03-24-page4.txt: [('E', 'ndureth')]
WMH19050726-V03-29-page1.txt: [('Publ', 'ic')]
WMH19050802-V03-30-page2.txt: [('deliver', 'ance')]
WMH19050830-V03-33-page3.txt: [('ap', 'plicable')]
WMH19050920-V03-36-page1.txt: [('V', 'ideto')]
WMH19051004-V03-38-page1.txt: [('pro', 'phetic')]
WMH19051004-V03-38-page3.txt: [('A', 'nd')]
WMH19051025-V03-41-page1.txt: [('H', 'artwell')]
WMH19051025-V03-41-page3.txt: [('pro', 'vides')]
WMH19051025-V03-41-page4.txt: [('M', 'adison')]
WMH19051129-V03-46-page1.txt: [('Ed', 'uc')]
WMH19051129-V03-46-page2.txt: [('The', 're')]
WMH19051129-V03-46-page3.txt: [('sol', 'emn')]
WMH19051129-V03-46-page4.txt: [('LIT', 'TLE')]
WMH19051213-V03-48-page2.txt: [('POT', 'TERVILLE')]
WMH19051213-V03-48-page3.txt: [('re', 'vived')]
WMH19060103-V04-01-page1.txt: [('The', 're')]
WMH19060117-V04-03-page1.txt: [('Mon', 'tcalm')]
WMH19060117-V04-03-page2.txt: [('CONFER', 'ENCE')]
WMH19060124-V04-04-page1.txt: [('Bar', 'ry')]
WMH19060131-V04-05-page1.txt: [('g', 'oo')]
WMH19060131-V04-05-page2.txt: [('of', 'ficer')]
WMH19060131-V04-05-page4.txt: [('so', 'journ')]
WMH19060214-V04-07-page2.txt: [('o', 'ff')]
WMH19060221-V04-08-page2.txt: [('Mar', 'garet')]
WMH19060228-V04-09-page1.txt: [('to', 'co'), ('Janu', 'ary')]
WMH19060307-V04-10-page1.txt: [('Com', 'mittee')]
WMH19060307-V04-10-page3.txt: [('teach', "ers'")]
WMH19060314-V04-11-page1.txt: [('to', 'co')]
WMH19060321-V04-12-page1.txt: [('temporari', 'ly'), ('con', 'ference')]
WMH19060411-V04-15-page1.txt: [('to', 'co'), ('GATH', 'ERETH')]
WMH19060411-V04-15-page2.txt: [('o', 'ro')]
WMH19060502-V04-18-page1.txt: [('r', 'INER')]
WMH19060502-V04-18-page3.txt: [('arrange', 'ment')]
WMH19060509-V04-19-page2.txt: [('state', 'ment')]
WMH19060606-V04-22-page1.txt: [('or', 'dained')]
WMH19060627-V04-25-page1.txt: [('O', 'ft')]
WMH19060711-V04-27-page1.txt: [('r', 'Ef')]
WMH19060711-V04-27-page2.txt: [('to', 'ro')]
WMH19060711-V04-27-page4.txt: [('t', 'oo')]
WMH19060725-V04-29-page2.txt: [('we', 're')]
WMH19060725-V04-29-page3.txt: [('COL', 'LEGE')]
WMH19060808-V04-31-page1.txt: [('CAN', 'VASSERS')]
WMH19060822-V04-32-page3.txt: [('Mes', 'siah')]
WMH19060905-V04-34-page3.txt: [('Confer', 'ence')]
WMH19060912-V04-35-page3.txt: [('to', 're')]
WMH19060926-V04-37-page2.txt: [('wei', 'ght')]
WMH19061017-V04-40-page1.txt: [('EAST', 'ERN')]
WMH19061017-V04-40-page3.txt: [('es', 'sential')]
WMH19061031-V04-42-page3.txt: [('de', 'cide')]
WMH19061114-V04-44-page4.txt: [('A', 'ncient'), ('to', 'ro')]
WMH19061205-V04-47-page2.txt: [('wrest', 'lers')]
WMH19061205-V04-47-page4.txt: [('D', 'ecember')]
WMH19070102-V05-01-page1.txt: [('g', 'oo')]
WMH19070102-V05-01-page3.txt: [('t', 'hrough')]
WMH19070109-V05-02-page1.txt: [('A', 'KA')]
WMH19070109-V05-02-page2.txt: [('prom', 'ised')]
WMH19070116-V05-03-page3.txt: [('founda', 'tion')]
WMH19070306-V05-10-page3.txt: [('thi', 'nking')]
WMH19070327-V05-13-page3.txt: [('consci', 'entious')]
WMH19070417-V05-16-page3.txt: [('u', 'nfailing')]
WMH19070424-V05-17-page1.txt: [('peo', 'ple')]
WMH19070424-V05-17-page2.txt: [('IN', 'STRUCTOR')]
WMH19070515-V05-20-page2.txt: [('di', 'fficulties')]
WMH19070605-V05-23-page2.txt: [('ha', 'th')]
WMH19070814-V05-33-page4.txt: [('Mon', 'tcalm')]
WMH19070828-V05-34-page3.txt: [('rat', 'es')]
WMH19070911-V05-36-page1.txt: [('d', 'ay'), ('to', 'ri')]
WMH19071002-V05-39-page1.txt: [('M', 'ichigan')]
WMH19071016-V05-41-page4.txt: [('go', 'od')]
WMH19071106-V05-44-page1.txt: [('SUPER', 'IOR')]
WMH19071106-V05-44-page2.txt: [('the', 'ist')]
WMH19071106-V05-44-page4.txt: [('t', 'Aro')]
WMH19071120-V05-46-page1.txt: [('ans', 'wer')]
WMH19071120-V05-46-page3.txt: [('w', 'hich')]
WMH19071211-V05-49-page2.txt: [('t', 'oo')]
WMH19071218-V05-50-page2.txt: [('a', 'nd')]
WMH19071218-V05-50-page3.txt: [('recom', 'mendation')]
WMH19080101-V06-01-page4.txt: [('the', 'ft')]
WMH19080115-V06-03-page3.txt: [('distri', 'bution')]
WMH19080122-V06-04-page3.txt: [('r', 'eligious')]
WMH19080129-V06-05-page4.txt: [('black', 'smithing')]
WMH19080212-V06-07-page2.txt: [('o', 'ro')]
WMH19080219-V06-08-page4.txt: [('pres', 'ent')]
WMH19080304-V06-10-page2.txt: [('period', 'icals')]
WMH19080311-V06-11-page1.txt: [('famil', 'iar')]
WMH19080401-V06-14-page2.txt: [('Sec', 'retaries')]
WMH19080401-V06-14-page3.txt: [('an', 'ther')]
WMH19080422-V06-17-page3.txt: [('for', 'th')]
WMH19080429-V06-18-page4.txt: [('Stu', "dents'")]
WMH19080506-V06-19-page1.txt: [('con', 'vention')]
WMH19080513-V06-20-page1.txt: [('con', 'tributions'), ('mo', 'rA')]
WMH19080513-V06-20-page2.txt: [('num', 'ber')]
WMH19080520-V06-21-page1.txt: [('e', 'rr'), ('A', 'MO')]
WMH19080520-V06-21-page2.txt: [('den', 'ial')]
WMH19080520-V06-21-page3.txt: [('o', 'ro')]
WMH19080603-V06-23-page1.txt: [('M', 'ICHIGAN')]
WMH19080610-V06-24-page4.txt: [('Adv', 'ent')]
WMH19080708-V06-28-page1.txt: [('k', 'AW')]
WMH19080708-V06-28-page3.txt: [('Bap', 'tist'), ('in', 'vited')]
WMH19080715-V06-29-page2.txt: [('A', 'dventists')]
WMH19080722-V06-30-page2.txt: [('LIB', 'ERTY'), ('John', 'ston')]
WMH19080805-V06-32-page2.txt: [('near', 'ly')]
WMH19080826-V06-34-page3.txt: [('class', 'es')]
WMH19080909-V06-36-page2.txt: [('auspi', 'ces')]
WMH19080916-V06-37-page3.txt: [('shad', 'ow')]
WMH19080916-V06-37-page4.txt: [('and', 're')]
WMH19080923-V06-38-page1.txt: [('t', 'IE')]
WMH19080923-V06-38-page4.txt: [('Pro', 'fessor')]
WMH19080930-V06-39-page1.txt: [('abo', 'ut')]
WMH19081007-V06-40-page4.txt: [('min', 'isters')]
WMH19081014-V06-41-page1.txt: [('i', 'ri')]
WMH19081014-V06-41-page2.txt: [('o', 'ro')]
WMH19081028-V06-43-page1.txt: [('gos', 'pel')]
WMH19081104-V06-44-page4.txt: [('a', 're')]
WMH19081111-V06-45-page5.txt: [('He', 'ra')]
In [48]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction6

Average verified rate: 0.977728102578558

Average of error rates: 0.024010546500479388

Total token count: 915279

In [49]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[49]:
[('m', 1746),
 ('w', 1502),
 ('g', 1438),
 ('e', 1316),
 ('d', 1278),
 ('r', 684),
 ('n', 641),
 ("'", 490),
 ('f', 446),
 ('t', 378),
 ('th', 273),
 ('oo', 171),
 ('sabbathschool', 163),
 ('io', 115),
 ('mt', 108),
 ('k', 105),
 ('co', 98),
 ('ro', 94),
 ('wm', 83),
 ('numbess', 75),
 ('u', 70),
 ("canvassers'", 58),
 ('x', 46),
 ('horr', 39),
 ("the'", 38),
 ('rd', 33),
 ('blendon', 32),
 ('brower', 31),
 ('ex', 30),
 ('harnden', 30),
 ("f'd", 30),
 ('mchugh', 29),
 ('seventhday', 28),
 ('cleora', 27),
 ('nd', 26),
 ('q', 23),
 ('nunica', 23),
 ('sabbathschools', 23),
 ('-', 22),
 ('tion', 21),
 ('vowyla', 21),
 ('re', 21),
 ('fd', 20),
 ('psa', 20),
 ('z', 20),
 ('loth', 20),
 ('numbeps', 19),
 ('ioo', 19),
 ('drury', 18),
 ('hoffstra', 18)]

Review Remaining Errors

In [50]:
reports.docs_with_high_error_rate(summary)
Out[50]:
[('WMH19081111-V06-45-page5.txt', 0.455)]
In [52]:
# %load shared_elements/high_error_rates.py
doc_keys = [x[0] for x in reports.docs_with_high_error_rate(summary) if x[1] > 0.2]

utilities.open_original_docs(doc_keys, directories['cycle'])
Opened files: 

WMH19081111-V06-45-page5.txt

High error document is handwritten.

In [54]:
reports.long_errors(errors_summary, min_length=15)
Out[54]:
(['heaven-appointed',
  'tionofourownpeople',
  'niialtioiaaavaliwailio',
  'unimpressionable',
  'enjoyable-service',
  'carefully-arranged',
  'disconnectedfrom',
  'csuperintendents',
  'sabbath-meetings',
  'distinguishingbetween',
  'great-grandchildren',
  'iqiiiidiiiniinii',
  'stick-to-it-iveness',
  'juippliympamtuuju',
  'ponderousdocument',
  'influentiarwriters',
  'blackstring-around-the-neck',
  'self-examination',
  'congregegational',
  'securingappointments',
  'dwellingconveniences',
  'all--sabbath-school',
  'thought-producing',
  'danish-norwegian',
  'sleepfsfuicffeicse',
  'counter-campaign',
  'stoop-shouldered',
  'disfeliowshipped',
  'desire--expression',
  'soul-encouraging',
  'abundantresources',
  'hethatreapethgatherethfruituntolifeeternal',
  'instrumentterial',
  'fruituntolifeeternal',
  'ceremonieswholly',
  'responstbilities',
  'commandment-keeping',
  'encouragingteachers',
  'bookslavebeenhandledmostly',
  'relief-of-schools',
  'great-responsibility',
  'trailting-school',
  'irrilirrimiiimiptimpiiir',
  'eleven-twentieths',
  'self-gratification',
  "under'compulsion",
  'twatmanypersonswouldgivea',
  'christianindividuals',
  'southernlllinois',
  'comparativelysmall',
  'weddingring-i-ub',
  'seventy-thousand',
  'arithmetic--decimal',
  'reapetagattiereth',
  "government--smith's",
  "fifteen-minutes'",
  'solemnmea-ningt---a-',
  'northmichigancamp-meetingat',
  'receiptrnargaret',
  'concerningemmarrual',
  'self-satisfaction',
  'wanted--assurance',
  'unproductiveness',
  'never-tobe-omitted',
  'the-sabbath-school',
  'imiminiiiiimicermin',
  'over-development',
  'overly-sensitive',
  'stumbling-blocks',
  'self-opinionated',
  "hartwelljn'behalf",
  'onion-in-the-pocket',
  'withindifference',
  'selfconsciousness',
  'annakemstraannddolivercrumb',
  'ntalliscomparativelyquiet',
  'drills--penmanship',
  'cut-price-combination',
  'over-enthusiastic',
  "iiiiiiiiiniiffffr'",
  'iiiiiiviiiiiimunimmi'],
 15)

Correction 7 -- Remove Long Error Tokens

In [57]:
# %load shared_elements/remove-tokens-with-long-strings-of-characters.py
prev = cycle
cycle = "correction7"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    text = re.sub(r"[0-9,!?$:;&]", " ", content)
    tokens = utilities.tokenize_text(text)
    
    replacements = []
    replacements.append(clean.check_for_repeating_characters(tokens, "i|I"))
    
    replacements = [item for sublist in replacements for item in sublist]
            
    if len(replacements) > 0:
        print('{}: {}'.format(filename, replacements))

        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    else:
        pass
    
    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
WMH19050104-V03-01-page4.txt: [('IIIIIIVIIIIIIMUNIMMI', ' ')]
WMH19060321-V04-12-page1.txt: [("IIIIIIIIINIIffffr'", ' ')]
WMH19080729-V06-31-page1.txt: [('IQIIIIdIIINIINII', ' ')]

Correction 8 -- Separate Squashed Words

In [59]:
# %load shared_elements/separate_squashed_words.py
import pandas as pd
from math import log

prev = cycle
cycle = "correction8"

directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
    os.makedirs(directories['cycle'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

verified_tokens = []

for filename in corpus:  
    content = utilities.readfile(directories['prev'], filename)
    clean.get_approved_tokens(content, spelling_dictionary, verified_tokens)

tokens_with_freq = dict(collections.Counter(verified_tokens))
words = pd.DataFrame(list(tokens_with_freq.items()), columns=['token','freq'])
words_sorted = words.sort_values('freq', ascending=False)
words_sorted_short = words_sorted[words_sorted.freq > 2]

sorted_list_of_words = list(words_sorted_short['token'])

corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))

for filename in corpus:
    content = utilities.readfile(directories['prev'], filename)

    text = utilities.strip_punct(content)
    tokens = utilities.tokenize_text(text)
    
    wordcost = dict((k, log((i+1)*log(len(sorted_list_of_words)))) for i,k in enumerate(sorted_list_of_words))
    maxword = max(len(x) for x in sorted_list_of_words)
    
    replacements = []
    
    for token in tokens:
        if not token.lower() in spelling_dictionary:
            if len(token) > 17:
                if re.search(r"[\-\-\'\"]", token):
                    pass
                else:
                    split_string = clean.infer_spaces(token, wordcost, maxword)
                    list_split_string = split_string.split()
                    
                    if clean.verify_split_string(list_split_string, spelling_dictionary):
                        replacements.append((token, split_string))
                    else:
                        pass
            else:
                pass
        else:
            pass
        
    if len(replacements) > 0:
        print("{}: {}".format(filename, replacements))
        
        for replacement in replacements:
            content = clean.replace_pair(replacement, content)
    
    else:
        pass

    with open(join(directories['cycle'], filename), mode="w") as o:
        o.write(content)
        o.close()
WMH19030603-V01-22-page3.txt: [('distinguishingbetween', 'distinguish ing between')]
WMH19040106-V02-02-page1.txt: [('Christianindividuals', 'Christian individuals')]
WMH19040928-V02-35-page2.txt: [('NIIALTIOIAAAVALIWAILIO', 'N I I A L T I O I A A A V A L I W A I L I O')]
WMH19050201-V03-04-page2.txt: [('irrilirrIMIIIMIPTIMPIIIR', 'ir r i l i r r I M I I I M I P T I M P I I I R')]
WMH19060117-V04-03-page1.txt: [('HETHATREAPETHGATHERETHFRUITUNTOLIFEETERNAL', 'HE THAT REAPETH GATHERETH FRUIT UNTO LIFE ETERNAL')]
WMH19060718-V04-28-page2.txt: [('encouragingteachers', 'encouraging teachers')]
WMH19060725-V04-29-page1.txt: [('FRUITUNTOLIFEETERNAL', 'FRUIT UNTO LIFE ETERNAL')]
WMH19060919-V04-36-page1.txt: [('comparativelysmall', 'comparatively small')]
WMH19080408-V06-15-page1.txt: [('bookslavebeenhandledmostly', 'book slave been handled mostly')]
WMH19080722-V06-30-page2.txt: [('securingappointments', 'securing appointments')]
WMH19080909-V06-36-page1.txt: [('HETHATREAPETHGATHERETHFRUITUNTOLIFEETERNAL', 'HE THAT REAPETH GATHERETH FRUIT UNTO LIFE ETERNAL')]
In [62]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction8

Average verified rate: 0.9777330102496761

Average of error rates: 0.024005752636625122

Total token count: 915346

In [63]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[63]:
[('m', 1749),
 ('w', 1503),
 ('g', 1438),
 ('e', 1316),
 ('d', 1278),
 ('r', 688),
 ('n', 642),
 ("'", 490),
 ('f', 446),
 ('t', 380),
 ('th', 273),
 ('oo', 171),
 ('sabbathschool', 163),
 ('io', 115),
 ('mt', 108),
 ('k', 105),
 ('co', 98),
 ('ro', 94),
 ('wm', 83),
 ('numbess', 75),
 ('u', 70),
 ("canvassers'", 58),
 ('x', 46),
 ('horr', 39),
 ("the'", 38),
 ('rd', 33),
 ('blendon', 32),
 ('brower', 31),
 ('ex', 30),
 ('harnden', 30),
 ("f'd", 30),
 ('mchugh', 29),
 ('seventhday', 28),
 ('cleora', 27),
 ('nd', 26),
 ('q', 23),
 ('nunica', 23),
 ('sabbathschools', 23),
 ('-', 22),
 ('tion', 21),
 ('vowyla', 21),
 ('re', 21),
 ('fd', 20),
 ('psa', 20),
 ('z', 20),
 ('loth', 20),
 ('numbeps', 19),
 ('ioo', 19),
 ('drury', 18),
 ('hoffstra', 18)]
In [ ]: