WHM-OCR-Evaluation-and-Correction
In [1]:
%load_ext autoreload
In [2]:
%autoreload 2
In [3]:
from text2topics import reports
from text2topics import utilities
from text2topics import clean
import re
import os
from os import listdir
from os.path import isfile, join
import collections
In [4]:
%matplotlib inline
In [5]:
wordlist_dir = "/Users/jeriwieringa/Dissertation/drafts/data/word-lists"
wordlists = ["2016-12-07-SDA-last-names.txt",
"2016-12-07-SDA-place-names.txt",
"2016-12-08-SDA-Vocabulary.txt",
"2017-01-03-place-names.txt",
"2017-02-14-Base-Word-List-SCOWL&KJV.txt",
"2017-02-14-Roman-Numerals.txt",
"2017-03-01-Additional-Approved-Words.txt"
]
In [6]:
spelling_dictionary = utilities.create_spelling_dictionary(wordlist_dir, wordlists)
In [7]:
title = "WMH"
In [8]:
base_dir = "/Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/{}/".format(title)
Baseline¶
In [9]:
cycle = 'baseline'
In [10]:
stats = reports.overview_report(join(base_dir, cycle), spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/baseline Average verified rate: 0.9316708765632767 Average of error rates: 0.07061265580057527 Total token count: 939453
In [11]:
errors_summary = reports.get_errors_summary( stats )
reports.top_errors( errors_summary, 500 )
Out[11]:
[('-', 1687),
('m', 1646),
('w', 1492),
('g', 1421),
('d', 1246),
('e', 1229),
('¥', 881),
('re-', 816),
('con-', 748),
('tion', 679),
('r', 664),
('n', 633),
('in-', 539)]
Review Special Character Use¶
In [12]:
reports.tokens_with_special_characters(errors_summary)
Out[12]:
[('¥', 881),
(')', 432),
('(', 368),
('ñ', 248),
('/', 171),
('ñthe', 100),
('_', 81),
('presidentña', 75),
('numbess)in', 70),
('¡', 65),
('educationñprof', 62),
('numbers)in', 62),
('%', 62),
('treasurerñe', 61),
('secretaryñm', 61),
('ñmargaret', 58),
('(to', 52),
('\\', 41),
('presidentñs', 38),
('ñelder', 37),
('treasñjennie', 35),
('ñselected', 35),
('presidentñm', 32),
('ña', 32),
('*', 31),
('//', 25),
('numbers)', 25),
('ã', 24),
('ñwe', 23),
('¢', 22),
('ô', 21),
('(the', 20),
('ñi', 20),
('numbeps)in', 19),
('numbees)in', 17),
('(b)', 16),
('ñthat', 16),
('(concluded', 16),
('¥the', 16),
('ñdied', 16),
('secretaryñs', 15),
('(a)', 15),
('ñhattie', 15),
('treasurerñd', 15),
('ñbrother', 13),
('(and', 13),
('ñand', 12),
('i)', 12),
('(ps', 12),
('(see', 12),
('(c)', 11),
('ñin', 11),
('(tithe)', 11),
('in¥', 11),
('ñmiss', 11),
('(john', 10),
('¥of', 9),
('quartetñ', 9),
('ñno', 9),
('ñdr', 8),
('ñfrom', 8),
('ñmrs', 8),
('ñas', 8),
('///', 8),
('•', 8),
('songñ', 8),
('(rev', 8),
('numbevs)in', 8),
('(heb', 7),
('numbews)in', 7),
('ñour', 7),
('(not', 7),
('numbeas)in', 7),
('(a', 7),
('continued)', 7),
('`', 7),
('ñw', 7),
('\\vest', 7),
('(isa', 6),
('\\\\', 6),
('nña', 6),
('ñthis', 6),
('ñan', 6),
('ñat', 6),
('ñsuccess', 6),
('(acts', 6),
('(d)', 6),
('ñit', 6),
('ñs', 6),
('(continued', 6),
('¦', 6),
('ó', 6),
(']', 6),
('(matt', 5),
('given)', 5),
('(sunday)', 5),
('ñone', 5),
('ñministry', 5),
('homeñthe', 5),
('(job', 5),
('sabbath¥school', 5),
('ññ', 5),
('(field', 5),
('ñnot', 5),
('(mrs', 5),
('¥we', 5),
('ñmen', 5),
('¥¥', 5),
('=', 5),
('educationña', 5),
('ñall', 5),
('michã', 5),
('()reek', 5),
('numbess)ln', 5),
('the¥', 5),
('ñprof', 5),
('ñella', 5),
('(i)', 4),
('(or', 4),
('ñthey', 4),
('ñsome', 4),
('ñgospel', 4),
('ñsister', 4),
('\ufeff', 4),
('\\\\\\\\', 4),
('(ex', 4),
('\\v', 4),
('numbcps)in', 4),
('ñj', 4),
('[', 4),
('wantedña', 4),
('ñfor', 4),
('i/', 4),
('(we', 4),
('numbecs)in', 4),
('(paper)', 4),
('ãã', 4),
('numbens)in', 4),
('(which', 4),
('the_', 4),
('ñm', 4),
('(luke', 4),
('¥in', 4),
('(i', 4),
('¥¥¥', 4),
('ñeld', 4),
('ñrev', 3),
('(this', 3),
('purposeñto', 3),
('/-', 3),
('(there', 3),
('(read', 3),
('he¥', 3),
('#', 3),
('be¥', 3),
('ñgeorge', 3),
('ob¥', 3),
('io¢', 3),
('(deut', 3),
('and¥', 3),
('[john', 3),
('(g)', 3),
('termñbible', 3),
('ñremember', 3),
('(so', 3),
('hymnñ', 3),
('ñtestimonies', 3),
('(cloth', 3),
('(f)', 3),
('(mal', 3),
('ñof', 3),
('¥to', 3),
('¥do', 3),
('(margin', 3),
('(in', 3),
('ñsimply', 3),
('(e)', 3),
('to¥', 3),
('saleña', 3),
('~', 3),
('¥and', 3),
('conference(tithe)', 3),
('¥been', 3),
('°', 3),
('tion)', 3),
('¡-', 3),
('(he', 3),
('o%', 3),
('ç', 3),
('ñlast', 3),
('ñyes', 3),
('}', 3),
('ñif', 3),
('ñis', 3),
('`great', 3),
('allñthe', 3),
('michiganñ', 3),
('ñmarianne', 3),
('byñ', 3),
('`the', 3),
('ñbible', 3),
('ñto', 3),
('(prov', 3),
('~~', 3),
('/i', 3),
('`object', 3),
('not¥', 3),
('(for', 3),
('christñ', 2),
('each)', 2),
('was¥', 2),
('¥a', 2),
('¥-', 2),
('igo+', 2),
('educationñits', 2),
('numbests)in', 2),
('i~n-', 2),
('with¥', 2),
('comfortñ', 2),
('/(', 2),
('can_', 2),
('*read', 2),
('(july', 2),
('¥ñ', 2),
('reading)', 2),
('ñh', 2),
('ñwhether', 2),
('ñc', 2),
('restñ', 2),
('/e', 2),
('ñthere', 2),
('%%', 2),
('(dt', 2),
('ñso', 2),
('ñf', 2),
('(new', 2),
('(as', 2),
(')))', 2),
('((that', 2),
('young*', 2),
('these¥', 2),
('(christ)', 2),
('carñoh', 2),
('ñeven', 2),
('_the', 2),
('work¥', 2),
('ñread', 2),
('ex¥', 2),
('wig)', 2),
('workñnot', 2),
('ñwith', 2),
('(vs', 2),
('(without', 2),
('[should]', 2),
('¥who', 2),
('on¥', 2),
('them)', 2),
('-ô', 2),
('ñhealth', 2),
('ñever', 2),
('grammarñcomplete', 2),
('usedñthe', 2),
('+', 2),
('to¢', 2),
('ant)', 2),
(')ñ', 2),
('back)', 2),
('[for', 2),
('>', 2),
('christ)', 2),
('this¥', 2),
('ñjames', 2),
('beñ', 2),
('(vol', 2),
("'¥", 2),
('church)', 2),
('ñhad', 2),
('(h)', 2),
('ñtwo', 2),
('ñare', 2),
('guidanceñ', 2),
("['sego", 2),
('paperñduties', 2),
('foodñ', 2),
('/a', 2),
('o¢', 2),
('ñreports', 2),
('r/', 2),
('\\k', 2),
('(april', 2),
('tions)', 2),
('ñwhen', 2),
('soloñ', 2),
('n¢', 2),
('quartetteñ', 2),
('childñhis', 2),
('ñfebruary', 2),
('in*', 2),
('(verse', 2),
('¥for', 2),
("///'", 2),
('numbers)ln', 2),
('ñsel', 2),
('camp¥meeting', 2),
('lord)', 2),
('ñseveral', 2),
('%two', 2),
('-¥', 2),
('drinkñ', 2),
('eternityñ', 2),
('¥c', 2),
('(those', 2),
('ñherrick', 2),
('sec¥', 2),
('fearsñhe', 2),
('(concluded)', 2),
('ñu', 2),
('ñsir', 2),
('(front', 2),
('subscriptions)', 2),
('page)', 2),
("'/", 2),
('father)', 2),
('greek]', 2),
('sabbath¥', 2),
('(iii', 2),
('franciscoñfell', 2),
('ñmembers', 2),
('(nov', 2),
('(isaiah', 2),
('(all', 2),
('(minister)', 2),
('they_', 2),
('smileñ', 2),
('ñjohn', 2),
('itñ', 2),
('his¥', 2),
('/#', 2),
('ñwill', 2),
('`we', 2),
('ñlittle', 2),
('presidentñ', 2),
('(-', 2),
(')))))', 2),
('ñd', 2),
('camp¥', 2),
('it¥', 2),
('ñsabbath', 2),
('_in', 2),
('trueñ', 2),
('(money', 2),
('ñwas', 2),
('saleñforty-acre', 2),
('%v', 2),
('(paper', 2),
('bibleñold', 2),
('ñby', 2),
('(poetry', 2),
('ñprofessor', 2),
(')(', 2),
('(even', 2),
('ñbut', 2),
('god)', 2),
('(ga', 2),
('(note', 2),
('(ecc', 2),
('paperñhow', 2),
('to¥show', 2),
('¥they', 2),
('(swedish)', 2),
('is¥', 2),
('to-day)', 2),
('ñthose', 2),
('paperñthe', 2),
('prayerñ', 2),
('♦', 2),
('saysñ', 2),
('(jno', 2),
('bookñthe', 2),
('¥be', 2),
('ñwhat', 2),
('re¥', 2),
('(life', 2),
('__', 2),
('(col', 2),
('_this', 2),
('heartñ', 2),
("¥'", 2),
('numbers)i', 2),
('[tight', 2),
('joyñ', 2),
('more¥', 2),
('ñabraham', 2),
('(psalms', 2),
('a¥', 2),
('(with', 2),
('`it', 2),
('ñwhich', 2),
('and_', 2),
('anythingñ', 2),
('is_', 2),
('that¥', 2),
('ñ-', 2),
('_have', 2),
('ñcamp-meetings', 2),
("(god's)", 1),
('`ye', 1),
('¥ence', 1),
('health]', 1),
('_lessons', 1),
('ñcollege', 1),
('countryñmussoorie', 1),
('ñhistorical', 1),
('(tile', 1),
('margaret¥ilaughey', 1),
('(twins', 1),
('gui)', 1),
('criti¥', 1),
('ques¥', 1),
('heartsñto', 1),
("'ô\\", 1),
('\\\\e', 1),
('_materials', 1),
('twoñgeneral', 1),
('[or', 1),
('to¥your', 1),
('burnhamñallegan', 1),
('baffledñdestroyed', 1),
('a\\mir', 1),
('was]', 1),
('`prepare', 1),
('(bishop)', 1),
('=pill/irk', 1),
('deredñ', 1),
('r)r', 1),
('//ii/', 1),
('before)', 1),
('michôan', 1),
('^', 1),
('(actions', 1),
('whileñis', 1),
('purposeñabraham', 1),
('(adopted', 1),
('%%us', 1),
('numbers)in-advance', 1),
('mentionedñconducting', 1),
('¥=', 1),
('_here', 1),
('and¥sisters', 1),
('every-__', 1),
('(margin)', 1),
('previous¥', 1),
('veas(', 1),
('possible¡', 1),
('illgami/', 1),
('causeñan', 1),
('(german', 1),
('ii)', 1),
('of¥', 1),
('stateñsome', 1),
('merriamñlowell', 1),
('(adv', 1),
('ined)', 1),
('shawñdied', 1),
('be=', 1),
('lettersñthe', 1),
('adam¥transgressed', 1),
('(forty', 1),
('wic/', 1),
('\\j', 1),
('the`lord', 1),
('out¥', 1),
('ñeugene', 1),
('pm/', 1),
('trioñ', 1),
('m(', 1),
('deposits)', 1),
('ñordis', 1),
(')}', 1),
('¨f', 1),
('center¥ñ', 1),
('convenñ', 1),
('scherzoñ', 1),
('exerciseñthe', 1),
('peo¥', 1),
('says)', 1),
('cx)', 1),
('or¥', 1),
('ñ=', 1),
('trainingñthe', 1),
('et*', 1),
("stringsñsailor'", 1),
('¥edward', 1),
('drillsñreading', 1),
('ñyouth', 1),
('(about', 1),
('v/', 1),
('_consideration', 1),
('lord¥', 1),
('atedña', 1),
('ñought', 1),
('mornñso', 1),
('ñoh', 1),
('prayers_', 1),
('mer_', 1),
('godñsome', 1),
('p/a', 1),
('dieñas', 1),
('%ell', 1),
('statementñ', 1),
('me)', 1),
('publishing_', 1),
("worldã'¥", 1),
("'ñand", 1),
('bodyña', 1),
('(that', 1),
('praise¥god', 1),
('wilburñportland', 1),
('preparationñits', 1),
('(ii', 1),
('meansñmen', 1),
("(/'", 1),
('ãg', 1),
('(vest', 1),
('compassionñ', 1),
('_lumber', 1),
('ñon', 1),
('/efr/', 1),
('willing¥', 1),
('ñeben', 1),
('thoughtñdivine', 1),
('pro¥', 1),
('ñworld', 1),
('necessaryñ', 1),
('exercisedñthe', 1),
('(excluding', 1),
('not¥in', 1),
('yardñwinifred', 1),
('oc)', 1),
("wr'%ô", 1),
('baptizedñthis', 1),
('the¥formation', 1),
('zw/i/', 1),
('arithmeticñcomplete', 1),
('overlookedñthe', 1),
('heartñgrowing', 1),
('ver)', 1),
('artñall', 1),
('each¥way', 1),
('bibleñchurch', 1),
('wordsñand', 1),
('some)', 1),
('morningñwhen', 1),
('accomplishñ', 1),
('tionñfurnishes', 1),
('s\\', 1),
('yô', 1),
('it/', 1),
('known)', 1),
('ñsabbath-', 1),
('¥from', 1),
('wa¤', 1),
('tory)', 1),
('downñthe', 1),
('ñfell', 1),
('valie¥', 1),
('a%\\', 1),
('cottñan', 1),
('a*', 1),
('standsñis', 1),
('friendñ', 1),
('ñper-', 1),
('goñall', 1),
('years)', 1),
('nersñand', 1),
('lith**', 1),
('ñarticles', 1),
("ô'd", 1),
('baptistñmillie', 1),
('membersñtwo', 1),
('ôi', 1),
('distantña', 1),
('stormôso', 1),
('lostñat', 1),
('(ger-', 1),
('trial¥', 1),
('pesveas(', 1),
('made¥', 1),
('%or', 1),
('here¥and', 1),
('numbcps)', 1),
('(absolute)', 1),
("ã'", 1),
('*have', 1),
('(mar-', 1),
('(d', 1),
('beastñthe', 1),
('(broth-', 1),
('inheritedñ', 1),
('a)', 1),
('teachersñ(', 1),
('ñforty', 1),
('((armed¥', 1),
('(virginia)', 1),
('pierceñmrs', 1),
('all¥the', 1),
('[not', 1),
('firga/', 1),
('emptyñcontribute', 1),
('thingñonly', 1),
('(log', 1),
('infancy)', 1),
('_---', 1),
("¥'we", 1),
('holding_', 1),
('lôilorning', 1),
('mountainsñwas', 1),
('institu¥', 1),
('then_', 1),
('ñtwenty-two', 1),
('does¥', 1),
('wrong¥', 1),
('meñif', 1),
('primaryãand', 1),
('èè', 1),
('\\varner', 1),
('rhetoricñkellogg', 1),
('ninthñnever', 1),
('translation)', 1),
('ñatlantic', 1),
('when_', 1),
("'illl~l", 1),
('ãmin', 1),
('itumegoc(', 1),
('¥incomparable', 1),
('appoint=', 1),
('ãli', 1),
('secondñdrink', 1),
('feelñwell', 1),
('comeñlet', 1),
('ñeating', 1),
('_sister', 1),
('iff(iii', 1),
('¥life', 1),
("'¥'", 1),
('pesyeas(', 1),
('withrowñdied', 1),
('recitationñ', 1),
('ñsaving', 1),
('(retail', 1),
('fit*takki', 1),
("curse')", 1),
('diedñin', 1),
('ñr', 1),
('(ise', 1),
('jam(', 1),
('egypt)', 1),
('a/', 1),
('(ind', 1),
('placesñthirty-three', 1),
("botanyñleavitt's", 1),
('ringsñby', 1),
('continued¥)', 1),
('z/g¥', 1),
('(board', 1),
('vs¥m', 1),
('godña', 1),
('cal*', 1),
('ñbrethren', 1),
('scaledñgod', 1),
('(denomi-', 1),
('(africa)', 1),
('ñhalf', 1),
('t*', 1),
('ammo(', 1),
('of`the', 1),
('property_', 1),
('use¥of', 1),
('nexus¥', 1),
('new¥', 1),
('(apostolic', 1),
('(march', 1),
('tistiofflau_j', 1),
('¥ations', 1),
('ñliquor', 1),
('(no', 1),
('minutes)', 1),
('crafts)', 1),
('ñstephen', 1),
('awayñtheir', 1),
('fieldñ', 1),
('ñwell', 1),
('ñor', 1),
('christñwho', 1),
('workñbeing', 1),
('paperñhave', 1),
('ñhe', 1),
('%%mo', 1),
('ñmain-', 1),
('(saturday)', 1),
('(danish-', 1),
('prophetically)', 1),
('deliver¥', 1),
('çflaiii', 1),
('\\\\ittuto', 1),
('¥ten', 1),
('edu¥', 1),
('riversñfifteen', 1),
('haugheyñotsego', 1),
('knowñi', 1),
('numbems)in', 1),
('/tioheagigt', 1),
('flowñ', 1),
('ñready', 1),
('~niiii', 1),
('*you', 1),
('letterñfrom', 1),
('jo¡', 1),
('corm)', 1),
('land)', 1),
('we¥are', 1),
('ñgeneral', 1),
('*two', 1),
('(should', 1),
("\\ctrir''", 1),
('tenthñ', 1),
('areña', 1),
('loi#d', 1),
('(under', 1),
('(especially', 1),
('two¥', 1),
('meri¥', 1),
('¥corliss', 1),
('are¥', 1),
('ñtogether', 1),
('thousand¥', 1),
('%vest', 1),
('t-}', 1),
('sorrowñ', 1),
('aboveñcause', 1),
('butterfieldñbuchanan', 1),
('spearñfell', 1),
('appe¥', 1),
('christñhe', 1),
('(except', 1),
('agesñ', 1),
('w//', 1),
('`m~d', 1),
('[the', 1),
('bath¥keepers', 1),
('¡heaven', 1),
('ñtuesday', 1),
('distanceñthe', 1),
('¥kalama', 1),
('c)', 1),
('(v', 1),
('_read', 1),
('tentsñone', 1),
('_e__zeo', 1),
('shriekñ', 1),
('ñenough', 1),
('tentsñthe', 1),
('(they', 1),
('homeñ', 1),
('fb/', 1),
('copyñthe', 1),
('smithñgrandville', 1),
('plifitt/', 1),
('satan¥', 1),
('departmentñtwo', 1),
('bandñ', 1),
('ñtemporal', 1),
('grandville_', 1),
('privilege/to', 1),
('faultsñshould', 1),
('answerñ', 1),
('(whatsoever', 1),
('body)', 1),
('[c]', 1),
('ac}', 1),
('¥usñthe', 1),
('portunity¥', 1),
('o/', 1),
('ñreasons', 1),
('r¢', 1),
('year(', 1),
('some`consideration', 1),
('ñmeetings', 1),
('()rues¡', 1),
('il/', 1),
('standpointña', 1),
('ñhand', 1),
('unionñfrank', 1),
('¡()', 1),
('wantedñto', 1),
("'%\\%", 1),
('scienceñelementary', 1),
('orphanñit', 1),
('¥planting', 1),
('(bo', 1),
('ñspeaking', 1),
('found¥the', 1),
('a/pfi', 1),
('¥ed', 1),
('`\\_\\_', 1),
('(two', 1),
('meet=', 1),
('\\\\ô\\', 1),
('aon¥', 1),
('%moo', 1),
('areñthe', 1),
('a(a', 1),
('thatñ', 1),
("under'compulsion)", 1),
('-_', 1),
('ãt', 1),
('ho\\tever', 1),
('hillginc/', 1),
('discus-¥', 1),
(')im', 1),
('(his', 1),
('go%', 1),
('ingsña', 1),
('pennsyl-(', 1),
('*heaven', 1),
("tm'\\", 1),
('reveals¥', 1),
('vvr/rip', 1),
('worldñthey', 1),
('*out', 1),
('(forces)', 1),
('laterñ', 1),
('is`situated', 1),
('graceñlove', 1),
('¥rela-', 1),
('ñmay', 1),
('(illus', 1),
('ñlet', 1),
("'wm*", 1),
('at¥corn', 1),
('itñthat', 1),
('up¥and', 1),
('holidayñdied', 1),
('yearñ', 1),
('shoulc_lopot', 1),
('there¥', 1),
('oneñto', 1),
('(symbolically', 1),
('prospectñwe', 1),
('turn_pale', 1),
("ãa'", 1),
('g¥', 1),
('(pest', 1),
('questionñis', 1),
("'ñone", 1),
('/inj', 1),
('_effect', 1),
('society(', 1),
('collardñdied', 1),
('ft/', 1),
('¥secretary', 1),
("\\n\\'", 1),
('(twelve', 1),
('bornñon', 1),
('may¥', 1),
('ñrepairs', 1),
('fãigr', 1),
("~iqiiiidiiiniinii(i'''", 1),
("/'i", 1),
('fridayñprepared', 1),
('_-', 1),
('governorñduties', 1),
('extras)', 1),
('history¥', 1),
('ere/', 1),
('*is%', 1),
('often*', 1),
('cudneyñdied', 1),
('(they)', 1),
('youñyou', 1),
('con_erning', 1),
('classesñintermediate', 1),
('roil/', 1),
('ii¥', 1),
('of/', 1),
('ñeach', 1),
("ñ'", 1),
('\\yam', 1),
('_apply', 1),
('`illessed', 1),
('*and', 1),
('(ilatchman', 1),
('resum¥', 1),
('ñlegislative', 1),
('peap(', 1),
('<', 1),
('/cartoinmtza', 1),
('fie/l(', 1),
('messageñto', 1),
(')he', 1),
('_god', 1),
('smithyñlesson', 1),
('whiteñthe', 1),
('re/', 1),
('sugar)', 1),
("('", 1),
('ñhave', 1),
('_much', 1),
('\\ô\\', 1),
('c_aivy', 1),
('for_room', 1),
('largeñone', 1),
('*licentiates', 1),
('influenceñin', 1),
('sutherland)', 1),
('k/aw', 1),
('ñdo', 1),
('ããilicom', 1),
('treasurerñ', 1),
('eoñternal', 1),
('tells_', 1),
('jill)', 1),
('before¥', 1),
('¥v', 1),
('`cast', 1),
('wedgeñkindness', 1),
('s¥ix', 1),
('illl~', 1),
('//mi', 1),
('kind)', 1),
('bless¥', 1),
("/g'", 1),
('poundñhath', 1),
('general_', 1),
('israelñ', 1),
('-•', 1),
('(such', 1),
('ureclly_il', 1),
('affliction_', 1),
('thisñthere', 1),
('ñnow', 1),
('solveñ', 1),
('*in', 1),
('house)', 1),
('ñduring', 1),
('freedomñ', 1),
('g\\ta', 1),
('¥bible', 1),
('other_', 1),
('(t', 1),
('aroundñ', 1),
('sundayñin', 1),
('athafitov/iati', 1),
('produce_a', 1),
('the¥thanksgiving', 1),
('/z//', 1),
('(story)', 1),
('myself)', 1),
('an¥', 1),
('*conference', 1),
('after¥', 1),
('ñsigns', 1),
('mcmorran*', 1),
('¥training', 1),
('ñen', 1),
('(george', 1),
('ordersñin', 1),
('seventhñbe', 1),
('edñdr', 1),
('ñ`a', 1),
('%ago', 1),
('vegetarianismñits', 1),
('conventionsñin', 1),
('%*', 1),
('(lowelf)', 1),
('_someone', 1),
('(let', 1),
('very¥', 1),
('siteaga_', 1),
('`value', 1),
('ñdwelling', 1),
('ñhattiee', 1),
('speakñmen', 1),
('pleasures)', 1),
('ò', 1),
('snunbers)in', 1),
('crossñ', 1),
('ñthese', 1),
('ro%', 1),
('mel/', 1),
('ñsunday-closing', 1),
('cheapñrubber-tired', 1),
('`permit', 1),
('(church', 1),
('stateñpublishes', 1),
("salvation'of¥", 1),
('bodiesñlet', 1),
('my_', 1),
('yeas(', 1),
("volun¥teers'", 1),
...]
Correction 1 -- Normalize Characters¶
In [14]:
# %load shared_elements/normalize_characters.py
prev = "baseline"
cycle = "correction1"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
# Substitute for all other dashes
content = re.sub(r"—-—–‑", r"-", content)
# Substitute formatted apostrophe
content = re.sub(r"\’\’\‘\'\‛\´", r"'", content)
# Replace all special characters with a space (as these tend to occur at the end of lines)
content = re.sub(r"[^a-zA-Z0-9\s,.!?$:;\-&\'\"]", r" ", content)
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
In [17]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction1 Average verified rate: 0.9377967276021958 Average of error rates: 0.0643058485139022 Total token count: 938150
In [18]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[18]:
[('m', 1749),
('-', 1720),
('w', 1503),
('g', 1434),
('e', 1307),
('d', 1276),
('re-', 816),
('con-', 750),
('tion', 684),
('r', 681),
('n', 644),
('in-', 539),
("'", 507),
('be-', 471),
('f', 444),
('t', 381),
('de-', 377),
('com-', 339),
('ex-', 332),
('michi-', 328),
('th', 289),
('sab-', 285),
('ment', 283),
('ence', 267),
('en-', 233),
('peo-', 226),
('sabbath-', 223),
('ly', 220),
('ference', 212),
('ple', 207),
('confer-', 207),
('pre-', 203),
('tions', 189),
('ad-', 186),
('dis-', 178),
('at-', 173),
('oo', 172),
('im-', 167),
('mis-', 164),
('un-', 163),
('meet-', 162),
('ers', 162),
('ac-', 161),
('pro-', 153),
('per-', 146),
('ber', 137),
('io', 117),
('ap-', 116),
('ren', 114),
('ary', 113)]
Correction 2 -- Connect Line Endings¶
In [20]:
# %load shared_elements/correct_line_endings.py
prev = cycle
cycle = "correction2"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
content = re.sub(r"(\w+)(\-\s{1,})([a-z]+)", r"\1\3", content)
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
In [23]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction2 Average verified rate: 0.9726863553068523 Average of error rates: 0.029485139022051778 Total token count: 915147
In [24]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[24]:
[('m', 1747),
('-', 1700),
('w', 1503),
('g', 1434),
('e', 1307),
('d', 1275),
('r', 680),
('n', 644),
("'", 507),
('f', 443),
('t', 377),
('th', 283),
('oo', 171),
('sabbathschool', 163),
('io', 117),
('mt', 108),
('k', 106),
('co', 102),
('ro', 94),
('wm', 82),
('numbess', 75),
('u', 69),
("'field", 67),
("canvassers'", 58),
('--', 50),
('x', 46),
("'the", 44),
('horr', 39),
("the'", 38),
('rd', 33),
('blendon', 32),
('mid-summer', 32),
('brower', 31),
("f'd", 30),
('-the', 29),
('harnden', 29),
('mchugh', 29),
('nd', 28),
('seventhday', 28),
('cleora', 27),
('ex', 26),
('tion', 25),
('sabbathschools', 23),
('q', 23),
('nunica', 23),
('con-', 22),
("'to", 22),
('vowyla', 21),
('-and', 21),
('loth', 20)]
Correction 3 -- Remove extra dashes¶
In [26]:
# %load shared_elements/remove_extra_dashes.py
prev = cycle
cycle = "correction3"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
replacements = []
for token in tokens:
if token[0] is "-":
replacements.append((token, token[1:]))
elif token[-1] is "-":
replacements.append((token, token[:-1]))
else:
pass
if len(replacements) > 0:
print("{}: {}".format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
WMH19030128-V01-04-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('labor-', 'labor'), ('-', '')]
WMH19030128-V01-04-page2.txt: [('-Prpartittrrit.', 'Prpartittrrit.')]
WMH19030128-V01-04-page3.txt: [('-', ''), ('HER-', 'HER')]
WMH19030128-V01-04-page4.txt: [('-', ''), ('Mich-', 'Mich')]
WMH19030204-V01-05-page1.txt: [('-', ''), ('fin-', 'fin'), ('-', ''), ('-', '')]
WMH19030204-V01-05-page2.txt: [('purit-', 'purit')]
WMH19030211-V01-06-page1.txt: [('-Is', 'Is'), ('-', ''), ('-', '')]
WMH19030211-V01-06-page3.txt: [('partic-', 'partic')]
WMH19030311-V01-10-page2.txt: [('ambush-', 'ambush')]
WMH19030311-V01-10-page3.txt: [('morn-', 'morn')]
WMH19030311-V01-10-page4.txt: [('en-', 'en'), ('TRAVIS.-', 'TRAVIS.'), ('WILBUR.-', 'WILBUR.')]
WMH19030415-V01-15-page2.txt: [('IMPRES-', 'IMPRES'), ('corn-', 'corn')]
WMH19030415-V01-15-page3.txt: [('-', ''), ('-air', 'air')]
WMH19030415-V01-15-page4.txt: [('HER-', 'HER')]
WMH19030506-V01-18-page1.txt: [('COM-', 'COM')]
WMH19030506-V01-18-page2.txt: [('cul-', 'cul')]
WMH19030506-V01-18-page4.txt: [('-', ''), ('meet-', 'meet'), ('Les-', 'Les'), ('conver-', 'conver'), ('forgive-', 'forgive'), ('sub-', 'sub'), ('HER-', 'HER')]
WMH19030513-V01-19-page2.txt: [('-formidable', 'formidable')]
WMH19030520-V01-20-page1.txt: [('-', '')]
WMH19030520-V01-20-page2.txt: [('-Drpartment', 'Drpartment'), ('--No.', '-No.')]
WMH19030520-V01-20-page3.txt: [('temperature-', 'temperature'), ('-', ''), ('temperature-', 'temperature'), ('-', ''), ('applications-', 'applications')]
WMH19030520-V01-20-page4.txt: [('Sand-', 'Sand')]
WMH19030527-V01-21-page1.txt: [('-', ''), ('-', ''), ('Heb-', 'Heb'), ('with-', 'with')]
WMH19030527-V01-21-page2.txt: [('--such', '-such'), ('faith-', 'faith')]
WMH19030527-V01-21-page3.txt: [('BAND-', 'BAND'), ('pun-', 'pun'), ('-', ''), ('ali-', 'ali'), ('rep-', 'rep')]
WMH19030603-V01-22-page1.txt: [('TES-', 'TES')]
WMH19030603-V01-22-page2.txt: [('SAB-', 'SAB'), ('CON-', 'CON')]
WMH19030603-V01-22-page3.txt: [('----', '---'), ('AB-', 'AB'), ('-', ''), ('physi-', 'physi')]
WMH19030603-V01-22-page4.txt: [('-', '')]
WMH19030610-V01-23-page1.txt: [('-', ''), ('-', '')]
WMH19030610-V01-23-page3.txt: [('-', ''), ('-', ''), ('reason-', 'reason')]
WMH19030624-V01-25-page1.txt: [('DEpART-', 'DEpART'), ('-', '')]
WMH19030624-V01-25-page3.txt: [('DETERIORA-', 'DETERIORA')]
WMH19030624-V01-25-page4.txt: [('Endeavor.-', 'Endeavor.'), ('Mc-', 'Mc')]
WMH19030701-V01-26-page1.txt: [('-', ''), ('any.-', 'any.'), ('DEPART-', 'DEPART')]
WMH19030701-V01-26-page3.txt: [('-', ''), ('-', ''), ('distribu-', 'distribu'), ('per-', 'per')]
WMH19030701-V01-26-page4.txt: [('-', '')]
WMH19030708-V01-27-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19030708-V01-27-page2.txt: [('-', ''), ('-', '')]
WMH19030708-V01-27-page3.txt: [('pro-', 'pro')]
WMH19030708-V01-27-page4.txt: [('Le-', 'Le'), ('-', '')]
WMH19030715-V01-28-page1.txt: [('-', '')]
WMH19030715-V01-28-page2.txt: [('-chool', 'chool')]
WMH19030715-V01-28-page3.txt: [('-IN', 'IN')]
WMH19030715-V01-28-page4.txt: [('HER-', 'HER'), ('non-un-', 'non-un')]
WMH19030722-V01-29-page1.txt: [('-', '')]
WMH19030722-V01-29-page2.txt: [('assem-', 'assem')]
WMH19030722-V01-29-page3.txt: [('-', ''), ('-', '')]
WMH19030722-V01-29-page4.txt: [('cents-', 'cents'), ('-Elder', 'Elder')]
WMH19030930-V01-39-page3.txt: [('in-', 'in'), ('-to', 'to')]
WMH19030930-V01-39-page4.txt: [('-', ''), ('-', ''), ('-revived.', 'revived.')]
WMH19031028-V01-43-page1.txt: [('-the', 'the'), ('corn-', 'corn'), ('-and', 'and'), ('be-', 'be'), ('-into', 'into')]
WMH19031028-V01-43-page4.txt: [('-', ''), ('-judgment', 'judgment'), ('-', '')]
WMH19031118-V01-46-page1.txt: [('-', ''), ('-', ''), ('mission--', 'mission-')]
WMH19031118-V01-46-page4.txt: [('-the', 'the'), ('-', '')]
WMH19040106-V02-02-page1.txt: [('every-', 'every'), ('peo-', 'peo'), ('-', ''), ('cor-', 'cor')]
WMH19040106-V02-02-page2.txt: [('indi-', 'indi')]
WMH19040106-V02-02-page3.txt: [('-such', 'such')]
WMH19040106-V02-02-page4.txt: [('--Prof.', '-Prof.'), ('Wag-', 'Wag'), ('"Work-', '"Work')]
WMH19040113-V02-03-page1.txt: [('-', ''), ('Van-', 'Van'), ('camp-meet-', 'camp-meet'), ('now-', 'now'), ('conven-', 'conven')]
WMH19040113-V02-03-page2.txt: [('God--', 'God-')]
WMH19040113-V02-03-page3.txt: [('AC-', 'AC'), ('-', '')]
WMH19040113-V02-03-page4.txt: [('--A', '-A')]
WMH19040127-V02-04-page1.txt: [('PRO-', 'PRO'), ('continu-', 'continu')]
WMH19040127-V02-04-page2.txt: [('as-', 'as')]
WMH19040127-V02-04-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19040127-V02-04-page4.txt: [('church.-', 'church.'), ('improve-', 'improve'), ('-A', 'A')]
WMH19040203-V02-05-page3.txt: [('GENER-', 'GENER')]
WMH19040203-V02-05-page4.txt: [('INSTRUCT-', 'INSTRUCT'), ('INSTRUCT-', 'INSTRUCT'), ('-', ''), ('at-', 'at'), ('--"We', '-"We')]
WMH19040210-V02-06-page1.txt: [('-', ''), ('-', ''), ('en-', 'en')]
WMH19040210-V02-06-page2.txt: [('educa-', 'educa')]
WMH19040210-V02-06-page3.txt: [('-', ''), ('Seventh-', 'Seventh'), ('-', '')]
WMH19040210-V02-06-page4.txt: [('--In', '-In'), ('San-', 'San'), ('--The', '-The')]
WMH19040217-V02-07-page1.txt: [('--sacred', '-sacred'), ('Assn.-', 'Assn.'), ('-', ''), ('President-', 'President')]
WMH19040217-V02-07-page3.txt: [('-', ''), ('-', '')]
WMH19040217-V02-07-page4.txt: [('--Elder', '-Elder'), ('-will', 'will')]
WMH19040224-V02-08-page1.txt: [('President-', 'President'), ('Assn.-', 'Assn.'), ('con-', 'con'), ('priv-', 'priv')]
WMH19040224-V02-08-page2.txt: [('-', ''), ('na-', 'na')]
WMH19040224-V02-08-page4.txt: [('-', ''), ('--Elder', '-Elder'), ('--The', '-The'), ('-will', 'will'), ('o--', 'o-')]
WMH19040302-V02-09-page1.txt: [('-to', 'to')]
WMH19040302-V02-09-page3.txt: [('--"the', '-"the'), ('-Dissipating', 'Dissipating')]
WMH19040302-V02-09-page4.txt: [('-', ''), ('-an', 'an'), ('-', '')]
WMH19040309-V02-10-page1.txt: [('Bat-', 'Bat'), ('-', '')]
WMH19040309-V02-10-page2.txt: [('AD-', 'AD'), ('-subscriptions', 'subscriptions')]
WMH19040309-V02-10-page3.txt: [('-This', 'This'), ('-', '')]
WMH19040309-V02-10-page4.txt: [('--"We', '-"We')]
WMH19040316-V02-11-page1.txt: [('righteous-', 'righteous'), ('Savioin-', 'Savioin'), ('Christ.--', 'Christ.-')]
WMH19040316-V02-11-page2.txt: [('Berrien-', 'Berrien'), ('be-', 'be')]
WMH19040316-V02-11-page3.txt: [('--.', '-.')]
WMH19040316-V02-11-page4.txt: [('-', ''), ('-At', 'At'), ('-to', 'to')]
WMH19040323-V02-12-page1.txt: [('-F.', 'F.'), ('-the', 'the'), ('-', '')]
WMH19040323-V02-12-page2.txt: [('and-', 'and')]
WMH19040323-V02-12-page3.txt: [('-taken', 'taken'), ('-', '')]
WMH19040323-V02-12-page4.txt: [('Mc-', 'Mc')]
WMH19040330-V02-13-page1.txt: [('--', '-'), ('RE-', 'RE'), ('PER-', 'PER')]
WMH19040330-V02-13-page2.txt: [('-have', 'have'), ('REC-', 'REC')]
WMH19040330-V02-13-page3.txt: [('bili-', 'bili'), ('biliousness.-', 'biliousness.')]
WMH19040330-V02-13-page4.txt: [('-be', 'be')]
WMH19040406-V02-14-page3.txt: [('In-', 'In'), ('abdom-', 'abdom'), ('-', '')]
WMH19040406-V02-14-page4.txt: [('-', '')]
WMH19040413-V02-15-page3.txt: [('an-', 'an'), ('-', '')]
WMH19040413-V02-15-page4.txt: [('SOUTH-', 'SOUTH'), ('-illustrated.', 'illustrated.'), ('-disposed', 'disposed')]
WMH19040420-V02-16-page1.txt: [('Zi-', 'Zi'), ('for-', 'for')]
WMH19040420-V02-16-page3.txt: [('-the', 'the'), ('-', ''), ('-DR.', 'DR.'), ('Three-', 'Three')]
WMH19040420-V02-16-page4.txt: [('-', ''), ('-', '')]
WMH19040427-V02-17-page2.txt: [('The-', 'The'), ('-', '')]
WMH19040427-V02-17-page3.txt: [('-', '')]
WMH19040427-V02-17-page4.txt: [('Swed-', 'Swed'), ('-', '')]
WMH19040504-V02-18-page2.txt: [('-', '')]
WMH19040504-V02-18-page3.txt: [('-its', 'its'), ('-of', 'of')]
WMH19040504-V02-18-page4.txt: [('-', '')]
WMH19040511-V02-19-page2.txt: [('-', ''), ('---of', '--of')]
WMH19040511-V02-19-page3.txt: [('-', ''), ('-', ''), ('mail-', 'mail'), ('-', ''), ('-', ''), ('-truths', 'truths'), ('-', ''), ('-Allegan', 'Allegan')]
WMH19040511-V02-19-page4.txt: [('-Remember', 'Remember'), ('-', ''), ('be-', 'be')]
WMH19040518-V02-20-page2.txt: [('-be', 'be')]
WMH19040518-V02-20-page3.txt: [('-note', 'note'), ('-mentioned', 'mentioned')]
WMH19040518-V02-20-page4.txt: [('inter.-', 'inter.'), ('-', ''), ('-blessed', 'blessed'), ('-', '')]
WMH19040601-V02-22-page1.txt: [('Ohio--', 'Ohio-'), ('-', ''), ('-Irwin', 'Irwin'), ('Andrea-', 'Andrea')]
WMH19040601-V02-22-page2.txt: [('-the', 'the')]
WMH19040608-V02-23-page1.txt: [('-A.', 'A.'), ('Treasurer-D.-', 'Treasurer-D.'), ('-', ''), ('-', ''), ("-urged'", "urged'")]
WMH19040608-V02-23-page2.txt: [('-', ''), ('dis-', 'dis'), ('-', ''), ('-', ''), ('-', ''), ('in-', 'in')]
WMH19040608-V02-23-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-God', 'God'), ('-to', 'to'), ('en-', 'en')]
WMH19040608-V02-23-page4.txt: [('De-', 'De'), ('-', '')]
WMH19040622-V02-24-page3.txt: [('-', ''), ('for-', 'for'), ('-', '')]
WMH19040622-V02-24-page4.txt: [('-', ''), ('Some-', 'Some'), ('-', '')]
WMH19040629-V02-25-page1.txt: [('-cultivate', 'cultivate'), ('-', ''), ('-', ''), ('or-', 'or')]
WMH19040629-V02-25-page2.txt: [('-this', 'this'), ('-be', 'be')]
WMH19040629-V02-25-page3.txt: [('-', ''), ('-five.', 'five.')]
WMH19040629-V02-25-page4.txt: [('-', '')]
WMH19040706-V02-26-page2.txt: [('con-', 'con'), ('attend-', 'attend'), ('Mission--', 'Mission-')]
WMH19040706-V02-26-page3.txt: [('--', '-'), ('-', ''), ('-', ''), ('-and', 'and'), ('world-', 'world')]
WMH19040713-V02-27-page1.txt: [('-', ''), ('CIRCUM-', 'CIRCUM'), ('ELECT-', 'ELECT'), ('DEFI-', 'DEFI')]
WMH19040713-V02-27-page2.txt: [('-ALL', 'ALL')]
WMH19040713-V02-27-page3.txt: [('HERALD.-', 'HERALD.'), ('-mee', 'mee'), ('-people', 'people'), ('-', ''), ('-whom', 'whom'), ('-', ''), ('par-', 'par')]
WMH19040720-V02-28-page1.txt: [('Vox-', 'Vox'), ('-a', 'a'), ('-principles', 'principles')]
WMH19040720-V02-28-page3.txt: [('-in', 'in')]
WMH19040720-V02-28-page4.txt: [('-', ''), ('Na-', 'Na'), ('-The', 'The'), ('-', ''), ('-', ''), ('announ-', 'announ'), ('---"Our', '--"Our')]
WMH19040727-V02-29-page1.txt: [('-', ''), ('straw-', 'straw'), ('-', ''), ('lights-', 'lights'), ('-Righteousness', 'Righteousness')]
WMH19040727-V02-29-page2.txt: [('--That', '-That'), ('-A', 'A')]
WMH19040727-V02-29-page3.txt: [('-may', 'may'), ('-small', 'small'), ('-', ''), ('--helpful', '-helpful')]
WMH19040803-V02-30-page1.txt: [('-the', 'the'), ('-', ''), ('round-', 'round'), ('-On', 'On'), ('-ether', 'ether'), ('purchas-', 'purchas'), ('.-', '.'), ('-not', 'not')]
WMH19040803-V02-30-page2.txt: [('-', ''), ('-', ''), ('-of', 'of')]
WMH19040803-V02-30-page3.txt: [('ex-', 'ex'), ('-church', 'church'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19040803-V02-30-page4.txt: [('-', ''), ('Van-', 'Van'), ('Van-', 'Van')]
WMH19040810-V02-31-page1.txt: [('-finding', 'finding'), ('-great', 'great'), ('South-', 'South'), ('-', ''), ('giv-', 'giv'), ('-', ''), ('peo-', 'peo')]
WMH19040810-V02-31-page3.txt: [('-', '')]
WMH19040810-V02-31-page4.txt: [('la-', 'la'), ('Sabbath-', 'Sabbath'), ('-', '')]
WMH19040817-V02-32-page1.txt: [('-', ''), ('-', ''), ('con-', 'con')]
WMH19040817-V02-32-page2.txt: [('-to', 'to'), ('-Dr.', 'Dr.')]
WMH19040817-V02-32-page3.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19040817-V02-32-page4.txt: [('benefit.-', 'benefit.')]
WMH19040831-V02-33-page1.txt: [('THEM-', 'THEM')]
WMH19040831-V02-33-page3.txt: [('repair-', 'repair')]
WMH19040831-V02-33-page4.txt: [('Howe-', 'Howe'), ('-', '')]
WMH19040914-V02-34-page1.txt: [('--', '-'), ('-', ''), ('-', '')]
WMH19040914-V02-34-page2.txt: [('--', '-')]
WMH19040914-V02-34-page3.txt: [('-and', 'and'), ('-', '')]
WMH19040921-V02-34a-page3.txt: [('con-', 'con'), ('-', ''), ('-', '')]
WMH19040921-V02-34a-page4.txt: [('II-', 'II')]
WMH19040928-V02-35-page1.txt: [('CAMP-', 'CAMP'), ('of-', 'of')]
WMH19040928-V02-35-page2.txt: [('-', '')]
WMH19040928-V02-35-page3.txt: [('pray-', 'pray')]
WMH19040928-V02-35-page4.txt: [('-', ''), ('-Miss', 'Miss'), ('-', ''), ('--', '-')]
WMH19041005-V02-36-page1.txt: [('House-to-', 'House-to'), ('librari-', 'librari'), ('"-', '"')]
WMH19041005-V02-36-page2.txt: [('-I', 'I'), ('-', '')]
WMH19041005-V02-36-page3.txt: [('indications-', 'indications'), ('-', ''), ('THANK-', 'THANK'), ('-have', 'have'), ('-breads', 'breads'), ('-local', 'local')]
WMH19041005-V02-36-page4.txt: [('-', '')]
WMH19041012-V02-37-page2.txt: [('Ad-', 'Ad'), ('-', ''), ('-', ''), ('at-', 'at')]
WMH19041012-V02-37-page4.txt: [('-conference', 'conference')]
WMH19041019-V02-38-page1.txt: [('done--', 'done-')]
WMH19041019-V02-38-page3.txt: [('ap-', 'ap'), ('perform-', 'perform'), ('-', ''), ('-tends', 'tends')]
WMH19041019-V02-38-page4.txt: [('Sabbath--', 'Sabbath-'), ('-Nashville', 'Nashville')]
WMH19041026-V02-39-page1.txt: [('disci-', 'disci')]
WMH19041026-V02-39-page2.txt: [('EN-', 'EN'), ('PEO-', 'PEO'), ('DISAP-', 'DISAP')]
WMH19041026-V02-39-page3.txt: [('PRE-', 'PRE'), ('Me-', 'Me'), ('at-', 'at')]
WMH19041026-V02-39-page4.txt: [('in-', 'in'), ('-', ''), ('Haughey-', 'Haughey')]
WMH19041102-V02-40-page1.txt: [('Lga-', 'Lga'), ('-West', 'West'), ('En-', 'En'), ('-themselves', 'themselves'), ('-', '')]
WMH19041102-V02-40-page2.txt: [('--about', '-about'), ('-this', 'this'), ('-hoped', 'hoped')]
WMH19041102-V02-40-page3.txt: [('-', ''), ('-asked', 'asked'), ('connec-', 'connec')]
WMH19041102-V02-40-page4.txt: [('Sabbath-', 'Sabbath'), ('-some', 'some'), ('-', '')]
WMH19041109-V02-41-page1.txt: [('San-', 'San')]
WMH19041109-V02-41-page2.txt: [('non-', 'non'), ('ever-', 'ever')]
WMH19041109-V02-41-page4.txt: [('in-', 'in')]
WMH19041116-V02-42-page1.txt: [('-', '')]
WMH19041116-V02-42-page4.txt: [('-', ''), ('Mc-', 'Mc'), ('Health-', 'Health')]
WMH19041123-V02-43-page1.txt: [('connected--', 'connected-'), ('-', ''), ('-is', 'is'), ('-and', 'and'), ('-', '')]
WMH19041123-V02-43-page2.txt: [('hun-', 'hun')]
WMH19041123-V02-43-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Sabbath-', 'Sabbath'), ('-could', 'could')]
WMH19041123-V02-43-page4.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19041130-V02-44-page1.txt: [('Mc-', 'Mc'), ('-West', 'West')]
WMH19041130-V02-44-page3.txt: [('-', ''), ('discour-', 'discour'), ('NEAT-', 'NEAT'), ('-', ''), ('-', '')]
WMH19041130-V02-44-page4.txt: [('io-', 'io'), ('-', '')]
WMH19041207-V02-45-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('Ave-', 'Ave')]
WMH19041207-V02-45-page2.txt: [('-and', 'and'), ('--No.', '-No.'), ('CHRIST-', 'CHRIST'), ('To-', 'To'), ('Paw-', 'Paw'), ('confer-', 'confer')]
WMH19041207-V02-45-page3.txt: [('-', ''), ('-', '')]
WMH19041207-V02-45-page4.txt: [('-aged', 'aged'), ('resurrection.-', 'resurrection.')]
WMH19041214-V02-46-page1.txt: [('o-', 'o'), ('-', '')]
WMH19041214-V02-46-page2.txt: [('-courage', 'courage')]
WMH19041214-V02-46-page3.txt: [('-through', 'through'), ('at-', 'at'), ('-', '')]
WMH19041214-V02-46-page4.txt: [('-to', 'to'), ('Sab-', 'Sab'), ('an-', 'an'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19041221-V02-47-page1.txt: [('Education-', 'Education'), ('-to', 'to'), ('and-', 'and'), ('-', ''), ('-and', 'and'), ('-grateful', 'grateful')]
WMH19041221-V02-47-page2.txt: [('-', ''), ('teach-', 'teach')]
WMH19041221-V02-47-page3.txt: [('-church', 'church'), ('WATCH-', 'WATCH'), ('-work', 'work'), ('NEC-', 'NEC'), ('DE-', 'DE'), ('EDI-', 'EDI'), ('REG-', 'REG'), ('-', ''), ('De-', 'De')]
WMH19041221-V02-47-page4.txt: [('-', ''), ('-', ''), ('be-', 'be'), ('-', ''), ('WATCH-', 'WATCH'), ('-of', 'of')]
WMH19041228-V02-48-page1.txt: [('con-', 'con'), ('-', ''), ('-', ''), ('-during', 'during')]
WMH19041228-V02-48-page2.txt: [('-', '')]
WMH19041228-V02-48-page3.txt: [('-or', 'or')]
WMH19041228-V02-48-page4.txt: [('Sabbath-', 'Sabbath'), ('-', ''), ('-', ''), ('Sabbath-', 'Sabbath')]
WMH19050104-V03-01-page1.txt: [('-', ''), ('Con-', 'Con'), ('-', '')]
WMH19050104-V03-01-page2.txt: [('Orange-', 'Orange'), ('-', ''), ('-previous', 'previous')]
WMH19050104-V03-01-page3.txt: [('--but', '-but'), ('-not', 'not')]
WMH19050104-V03-01-page4.txt: [('-', '')]
WMH19050111-V03-02-page1.txt: [('o-', 'o')]
WMH19050111-V03-02-page2.txt: [('bap-', 'bap'), ('-', ''), ('-', ''), ('-', '')]
WMH19050111-V03-02-page3.txt: [('-', ''), ('-', ''), ('-difficult', 'difficult'), ('-and', 'and'), ('-new', 'new')]
WMH19050111-V03-02-page4.txt: [('-have', 'have'), ('faith-', 'faith'), ('-ful', 'ful'), ('zo-', 'zo'), ('Sabbath-', 'Sabbath')]
WMH19050118-V03-03-page1.txt: [('V-', 'V')]
WMH19050118-V03-03-page2.txt: [('-', '')]
WMH19050118-V03-03-page4.txt: [('-G.', 'G.'), ('-', '')]
WMH19050201-V03-04-page1.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19050201-V03-04-page2.txt: [('-', ''), ('-', '')]
WMH19050201-V03-04-page4.txt: [("-YOUTH'S", "YOUTH'S"), ('-page', 'page'), ('Mich-', 'Mich'), ('-', ''), ('-', ''), ('-', '')]
WMH19050208-V03-05-page1.txt: [('-', ''), ('Cre-', 'Cre')]
WMH19050208-V03-05-page2.txt: [('-over', 'over'), ('corn-', 'corn')]
WMH19050208-V03-05-page3.txt: [('-to', 'to')]
WMH19050208-V03-05-page4.txt: [('-some', 'some')]
WMH19050215-V03-06-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19050215-V03-06-page3.txt: [('-', '')]
WMH19050215-V03-06-page4.txt: [('-for', 'for'), ('-', '')]
WMH19050222-V03-07-page1.txt: [('-', ''), ('.-', '.'), ('reports-', 'reports'), ('-', ''), ('-', '')]
WMH19050222-V03-07-page2.txt: [('-', ''), ('-', '')]
WMH19050222-V03-07-page3.txt: [('-', ''), ('-', ''), ('-We', 'We'), ('CAN-', 'CAN')]
WMH19050222-V03-07-page4.txt: [('con-', 'con')]
WMH19050301-V03-08-page1.txt: [('W.-', 'W.'), ('Mc-', 'Mc')]
WMH19050301-V03-08-page4.txt: [('-', '')]
WMH19050315-V03-10-page1.txt: [('-', ''), ('-', ''), ('-sending', 'sending'), ('De-', 'De')]
WMH19050315-V03-10-page3.txt: [('-proclaim', 'proclaim'), ('-and', 'and')]
WMH19050315-V03-10-page4.txt: [('agnos-', 'agnos'), ('ordi-', 'ordi')]
WMH19050322-V03-11-page1.txt: [('-HERALD.', 'HERALD.')]
WMH19050322-V03-11-page2.txt: [('-the', 'the'), ('-it', 'it'), ('-incident', 'incident'), ('at-', 'at')]
WMH19050322-V03-11-page3.txt: [('-oldest', 'oldest'), ('-became', 'became'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('hall-', 'hall'), ('neces-', 'neces')]
WMH19050322-V03-11-page5.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('C-', 'C'), ('G-', 'G'), ('-', '')]
WMH19050322-V03-11-page6.txt: [('-', ''), ('-', '')]
WMH19050329-V03-12-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Confer-', 'Confer'), ('-', ''), ('Depart-', 'Depart')]
WMH19050329-V03-12-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19050329-V03-12-page4.txt: [('-', ''), ('-"The', '"The')]
WMH19050405-V03-13-page1.txt: [('I--', 'I-'), ('-', ''), ('CON-', 'CON')]
WMH19050405-V03-13-page4.txt: [('-', '')]
WMH19050413-V03-14-page1.txt: [('-', '')]
WMH19050413-V03-14-page2.txt: [('-', '')]
WMH19050413-V03-14-page3.txt: [('GIV-', 'GIV'), ('-disciplined', 'disciplined')]
WMH19050419-V03-15-page1.txt: [('-', ''), ('-', ''), ('confer-', 'confer')]
WMH19050419-V03-15-page3.txt: [('So-', 'So')]
WMH19050419-V03-15-page4.txt: [('-Dr.', 'Dr.')]
WMH19050426-V03-16-page1.txt: [('-', ''), ('Roth-', 'Roth')]
WMH19050426-V03-16-page2.txt: [('-', ''), ('-', ''), ('of-', 'of')]
WMH19050426-V03-16-page3.txt: [('-knees', 'knees')]
WMH19050426-V03-16-page4.txt: [('cur-', 'cur'), ('-', ''), ('type-', 'type')]
WMH19050503-V03-17-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('UTTER-', 'UTTER'), ('Ob-', 'Ob')]
WMH19050503-V03-17-page2.txt: [('activity-', 'activity'), ('in-', 'in')]
WMH19050503-V03-17-page3.txt: [('-THE', 'THE'), ('-well-officered', 'well-officered'), ('-and', 'and'), ('Pennsyl-', 'Pennsyl')]
WMH19050503-V03-17-page4.txt: [('-', '')]
WMH19050510-V03-18-page1.txt: [('di-', 'di'), ('-of', 'of'), ('teach-', 'teach')]
WMH19050510-V03-18-page2.txt: [('-', '')]
WMH19050510-V03-18-page4.txt: [('Seventh-', 'Seventh')]
WMH19050517-V03-19-page1.txt: [('lead-', 'lead'), ('-', '')]
WMH19050517-V03-19-page3.txt: [('-of', 'of')]
WMH19050517-V03-19-page4.txt: [('Confer-', 'Confer'), ('-ence', 'ence')]
WMH19050524-V03-20-page2.txt: [('call-', 'call')]
WMH19050524-V03-20-page3.txt: [('-possession.', 'possession.')]
WMH19050524-V03-20-page4.txt: [('-', '')]
WMH19050531-V03-21-page1.txt: [('-', '')]
WMH19050531-V03-21-page3.txt: [('MICH-', 'MICH'), ('-"Missionary', '"Missionary'), ('-be', 'be')]
WMH19050531-V03-21-page4.txt: [('-', '')]
WMH19050607-V03-22-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--live', '-live')]
WMH19050607-V03-22-page3.txt: [('-and', 'and'), ('-Encyclopedia', 'Encyclopedia'), ('-Luke', 'Luke'), ('-Isa.', 'Isa.'), ('-', ''), ('-', ''), ('-Matt.', 'Matt.'), ('-Jews', 'Jews'), ('--Heb.', '-Heb.'), ('-Rev.', 'Rev.'), ('-Isa.', 'Isa.'), ('-Neh.', 'Neh.'), ('-Ex.', 'Ex.'), ('-', ''), ('---Gen.', '--Gen.'), ('-', ''), ('--Gen.', '-Gen.'), ('-', ''), ('-', ''), ('-', ''), ('-Isa.', 'Isa.'), ('-', '')]
WMH19050607-V03-22-page4.txt: [('-', '')]
WMH19050614-V03-23-page2.txt: [('---"I', '--"I'), ('-', '')]
WMH19050614-V03-23-page3.txt: [('les-', 'les')]
WMH19050614-V03-23-page4.txt: [('-', ''), ('quar-', 'quar')]
WMH19050621-V03-24-page1.txt: [('-', ''), ('edu-', 'edu')]
WMH19050621-V03-24-page2.txt: [('-may', 'may'), ('-', ''), ('-', '')]
WMH19050621-V03-24-page3.txt: [('-', '')]
WMH19050621-V03-24-page4.txt: [('-', ''), ('-', '')]
WMH19050628-V03-25-page1.txt: [('-', ''), ('con-', 'con'), ('-', '')]
WMH19050705-V03-26-page1.txt: [('-s', 's'), ('a-', 'a'), ('--Selected.', '-Selected.'), ('Un-', 'Un')]
WMH19050705-V03-26-page2.txt: [('-by', 'by'), ('-', '')]
WMH19050705-V03-26-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-in', 'in'), ('gain-', 'gain'), ('-', '')]
WMH19050712-V03-27-page1.txt: [('re-', 're')]
WMH19050712-V03-27-page2.txt: [('Self-', 'Self'), ('defi-', 'defi')]
WMH19050712-V03-27-page3.txt: [('month-', 'month'), ('superintend-', 'superintend'), ('--Selected.', '-Selected.'), ('-', ''), ('-', ''), ('Sabbath-', 'Sabbath'), ('ef-', 'ef')]
WMH19050712-V03-27-page4.txt: [('-pain', 'pain')]
WMH19050719-V03-28-page1.txt: [('-', ''), ('arrang-', 'arrang')]
WMH19050719-V03-28-page3.txt: [('-We', 'We')]
WMH19050719-V03-28-page4.txt: [('-', '')]
WMH19050726-V03-29-page1.txt: [('-with', 'with'), ('CAMP-', 'CAMP'), ('-', ''), ('-', ''), ('-', '')]
WMH19050726-V03-29-page3.txt: [('being--', 'being-'), ('-to', 'to'), ('-', ''), ('-', '')]
WMH19050726-V03-29-page4.txt: [('-', ''), ('-', '')]
WMH19050802-V03-30-page1.txt: [('Camp-', 'Camp'), ('---health', '--health'), ('--for', '-for'), ('Camp-', 'Camp'), ('-', '')]
WMH19050802-V03-30-page2.txt: [('asked-', 'asked')]
WMH19050802-V03-30-page3.txt: [('Sabbath-', 'Sabbath'), ('--Selected.', '-Selected.')]
WMH19050802-V03-30-page4.txt: [('corn-', 'corn'), ('-', ''), ('Organiza-', 'Organiza')]
WMH19050809-V03-31-page1.txt: [('-', ''), ('--', '-'), ('Camp-', 'Camp'), ('-', '')]
WMH19050809-V03-31-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19050809-V03-31-page3.txt: [('-', ''), ('neces-', 'neces'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19050816-V03-32-page1.txt: [('inter-', 'inter'), ('-', '')]
WMH19050816-V03-32-page2.txt: [('fol-', 'fol'), ('them-', 'them')]
WMH19050816-V03-32-page3.txt: [('-Foster', 'Foster')]
WMH19050816-V03-32-page4.txt: [('-EZRA', 'EZRA')]
WMH19050830-V03-33-page2.txt: [('-', '')]
WMH19050830-V03-33-page3.txt: [('-', ''), ('-cents', 'cents'), ('-', ''), ('-', '')]
WMH19050830-V03-33-page4.txt: [('-to', 'to'), ('-book', 'book'), ('HER-', 'HER')]
WMH19050906-V03-34-page1.txt: [('Sabbath-', 'Sabbath')]
WMH19050906-V03-34-page2.txt: [('DEPART-', 'DEPART')]
WMH19050906-V03-34-page3.txt: [('--Selected.', '-Selected.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Camp-meet-', 'Camp-meet')]
WMH19050906-V03-34-page4.txt: [('-', ''), ('-at', 'at'), ('-', ''), ('work.-', 'work.'), ('-', ''), ('-', '')]
WMH19050913-V03-35-page1.txt: [('what-', 'what')]
WMH19050913-V03-35-page3.txt: [('--', '-'), ('under-', 'under')]
WMH19050913-V03-35-page4.txt: [('-', ''), ('-', '')]
WMH19050920-V03-36-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19050920-V03-36-page2.txt: [('Sabbath-', 'Sabbath')]
WMH19050920-V03-36-page3.txt: [('-purpose', 'purpose')]
WMH19050920-V03-36-page4.txt: [('-', ''), ('-son', 'son'), ('REVIEW--', 'REVIEW-'), ('SIGNS-', 'SIGNS'), ('HEALTH-', 'HEALTH')]
WMH19050927-V03-37-page1.txt: [('-', ''), ('-I', 'I')]
WMH19050927-V03-37-page2.txt: [('-', ''), ('-low', 'low'), ('im-', 'im')]
WMH19050927-V03-37-page3.txt: [('cher-', 'cher')]
WMH19051004-V03-38-page1.txt: [('ad-', 'ad'), ('-', ''), ('-', ''), ('-', ''), ('Ad-', 'Ad')]
WMH19051004-V03-38-page2.txt: [('-', '')]
WMH19051004-V03-38-page3.txt: [('--Selected.', '-Selected.'), ('coun-', 'coun'), ('Yose-', 'Yose')]
WMH19051004-V03-38-page4.txt: [('and-', 'and'), ('thepro-', 'thepro')]
WMH19051011-V03-39-page1.txt: [('-', '')]
WMH19051011-V03-39-page2.txt: [('Sab-', 'Sab')]
WMH19051018-V03-40-page1.txt: [('corn-', 'corn')]
WMH19051018-V03-40-page2.txt: [('commandments-', 'commandments'), ('-much', 'much')]
WMH19051018-V03-40-page3.txt: [('-', ''), ('-', '')]
WMH19051018-V03-40-page4.txt: [('--Tarry', '-Tarry'), ('Bourdeau-', 'Bourdeau')]
WMH19051025-V03-41-page1.txt: [('-', '')]
WMH19051025-V03-41-page3.txt: [('--Selected.', '-Selected.'), ('pro-', 'pro')]
WMH19051025-V03-41-page4.txt: [('--also', '-also')]
WMH19051101-V03-42-page1.txt: [('future--', 'future-'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19051101-V03-42-page2.txt: [('Ad-', 'Ad')]
WMH19051101-V03-42-page3.txt: [('essential-', 'essential'), ('suf-', 'suf')]
WMH19051101-V03-42-page4.txt: [('-will', 'will'), ('MESSEN-', 'MESSEN'), ('-and', 'and'), ('-', '')]
WMH19051108-V03-43-page1.txt: [('domi-', 'domi'), ('-', ''), ('-', '')]
WMH19051108-V03-43-page3.txt: [('Mc-', 'Mc'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-Maple', 'Maple')]
WMH19051108-V03-43-page4.txt: [('-of', 'of'), ('-', ''), ('-Editor', 'Editor')]
WMH19051122-V03-45-page1.txt: [('-', ''), ('-', ''), ('-"I\'ll', '"I\'ll'), ('-', ''), ('-', ''), ('A-', 'A'), ('-lambs', 'lambs'), ('Advent-', 'Advent')]
WMH19051122-V03-45-page2.txt: [('-', '')]
WMH19051122-V03-45-page4.txt: [('-', ''), ('-', ''), ('Broth-', 'Broth')]
WMH19051129-V03-46-page1.txt: [('-weary', 'weary'), ('sys-', 'sys')]
WMH19051129-V03-46-page2.txt: [('-', ''), ('in-', 'in')]
WMH19051129-V03-46-page3.txt: [('-this', 'this'), ('-', ''), ('-', '')]
WMH19051129-V03-46-page4.txt: [('LIT-', 'LIT'), ('LIT-', 'LIT')]
WMH19051206-V03-47-page1.txt: [('-our', 'our'), ('Cedar-', 'Cedar'), ('De-', 'De'), ('-', ''), ('--', '-'), ('interest-', 'interest')]
WMH19051206-V03-47-page2.txt: [('-', '')]
WMH19051206-V03-47-page3.txt: [('-fruit', 'fruit')]
WMH19051206-V03-47-page4.txt: [('ad-', 'ad'), ('-', '')]
WMH19051213-V03-48-page1.txt: [('recitation.-', 'recitation.')]
WMH19051213-V03-48-page2.txt: [('can-', 'can'), ('POT-', 'POT')]
WMH19051213-V03-48-page3.txt: [('-', ''), ('-', '')]
WMH19051213-V03-48-page4.txt: [('copies.-', 'copies.'), ('-Creek', 'Creek'), ('var-', 'var')]
WMH19051220-V03-49-page1.txt: [('right-', 'right'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19051220-V03-49-page2.txt: [('ADVENTIST-', 'ADVENTIST'), ('CO-', 'CO'), ('-', ''), ('Ix-', 'Ix')]
WMH19051220-V03-49-page3.txt: [('-', ''), ('-', '')]
WMH19051227-V03-50-page2.txt: [('-', ''), ("People's-", "People's"), ('-given', 'given'), ('-etc.', 'etc.'), ('-', '')]
WMH19051227-V03-50-page3.txt: [('-', ''), ('-Although', 'Although'), ('o-', 'o'), ('-', ''), ('Her-', 'Her')]
WMH19060103-V04-01-page1.txt: [('consider-', 'consider'), ('ask-', 'ask'), ('-.', '.')]
WMH19060103-V04-01-page2.txt: [('years..-', 'years..'), ('sin-', 'sin')]
WMH19060103-V04-01-page3.txt: [('Self-', 'Self'), ('birth-', 'birth')]
WMH19060103-V04-01-page4.txt: [('-', ''), ('-', '')]
WMH19060110-V04-02-page1.txt: [('-fifteen', 'fifteen'), ('-', '')]
WMH19060110-V04-02-page2.txt: [('-', ''), ('-Paw', 'Paw'), ('-', ''), ('-', ''), ('-', '')]
WMH19060110-V04-02-page4.txt: [('-', '')]
WMH19060117-V04-03-page1.txt: [('Secretary--', 'Secretary-'), ('-', '')]
WMH19060117-V04-03-page2.txt: [('CONFER-', 'CONFER')]
WMH19060117-V04-03-page3.txt: [('-loans', 'loans'), ('-', ''), ('R-', 'R'), ('Mich-', 'Mich'), ('-', '')]
WMH19060117-V04-03-page4.txt: [('-', ''), ('Offer-', 'Offer'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060124-V04-04-page1.txt: [('--jr', '-jr'), ('--', '-'), ('-', ''), ('-', ''), ('SABBATH-', 'SABBATH'), ('-', '')]
WMH19060124-V04-04-page2.txt: [('INCORPO-', 'INCORPO'), ('aggre-', 'aggre')]
WMH19060124-V04-04-page3.txt: [('Mc-', 'Mc'), ('-', ''), ('-', ''), ('-', '')]
WMH19060124-V04-04-page4.txt: [('-', ''), ('-', '')]
WMH19060131-V04-05-page1.txt: [('------', '-----'), ('-', ''), ('-luessiorpi', 'luessiorpi')]
WMH19060131-V04-05-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('Mis-', 'Mis')]
WMH19060131-V04-05-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('sub-', 'sub'), ('-', '')]
WMH19060207-V04-06-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060207-V04-06-page3.txt: [('Spirit-', 'Spirit'), ('-', ''), ('place.-', 'place.')]
WMH19060214-V04-07-page1.txt: [('t-', 't'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060214-V04-07-page2.txt: [('-', ''), ('-Lessons', 'Lessons')]
WMH19060214-V04-07-page3.txt: [('-all', 'all'), ('-the', 'the'), ('Danish-', 'Danish')]
WMH19060214-V04-07-page4.txt: [('BOOK.-', 'BOOK.'), ('fitthem-', 'fitthem')]
WMH19060221-V04-08-page1.txt: [('--', '-'), ("''-is'-", "''-is'"), ('develop-', 'develop'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060221-V04-08-page2.txt: [('-', ''), ('adopt-', 'adopt')]
WMH19060221-V04-08-page3.txt: [('themselves-', 'themselves')]
WMH19060221-V04-08-page4.txt: [('-were', 'were')]
WMH19060228-V04-09-page1.txt: [('in-', 'in'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060228-V04-09-page2.txt: [('-', '')]
WMH19060228-V04-09-page3.txt: [('-', ''), ('me-', 'me')]
WMH19060307-V04-10-page1.txt: [('-', ''), ('Vice-', 'Vice'), ('-', ''), ('-', '')]
WMH19060307-V04-10-page2.txt: [('--the', '-the'), ('-', '')]
WMH19060307-V04-10-page3.txt: [('-', ''), ('-theory', 'theory')]
WMH19060307-V04-10-page4.txt: [('in-', 'in'), ('-', ''), ('-', ''), ('-', '')]
WMH19060314-V04-11-page1.txt: [('-', ''), ('con-', 'con'), ('-', ''), ('-', '')]
WMH19060314-V04-11-page2.txt: [('-', '')]
WMH19060314-V04-11-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('sug-', 'sug')]
WMH19060314-V04-11-page4.txt: [('way.-', 'way.'), ('MICHI-', 'MICHI'), ('-', '')]
WMH19060321-V04-12-page1.txt: [('con-', 'con'), ('-', '')]
WMH19060321-V04-12-page2.txt: [('HER-', 'HER')]
WMH19060321-V04-12-page3.txt: [('spelling--', 'spelling-'), ('-future', 'future'), ('San-', 'San')]
WMH19060321-V04-12-page4.txt: [('-Will', 'Will')]
WMH19060328-V04-13-page1.txt: [('--', '-'), ('GATHERETI-', 'GATHERETI'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('sup-', 'sup')]
WMH19060328-V04-13-page2.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19060328-V04-13-page3.txt: [('-', ''), ('under-', 'under'), ('receiver-', 'receiver')]
WMH19060328-V04-13-page4.txt: [('--', '-'), ('-', ''), ('-', ''), ('-', '')]
WMH19060404-V04-14-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060404-V04-14-page3.txt: [('-Uttered', 'Uttered'), ('--', '-')]
WMH19060404-V04-14-page4.txt: [('-', '')]
WMH19060411-V04-15-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Conven-', 'Conven')]
WMH19060411-V04-15-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Offer-', 'Offer'), ('-', '')]
WMH19060411-V04-15-page3.txt: [('-', ''), ('-ro', 'ro')]
WMH19060411-V04-15-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060418-V04-16-page1.txt: [('GATHERETI-', 'GATHERETI'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('---the', '--the'), ('-', ''), ('-', ''), ('work-', 'work')]
WMH19060418-V04-16-page3.txt: [('-', ''), ('-it', 'it')]
WMH19060418-V04-16-page4.txt: [('Hunts-', 'Hunts'), ('-', '')]
WMH19060425-V04-17-page1.txt: [('-in', 'in'), ('build-', 'build'), ('-', '')]
WMH19060425-V04-17-page2.txt: [('--FLORENCE', '-FLORENCE'), ('right-', 'right')]
WMH19060425-V04-17-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('SABBATH-', 'SABBATH')]
WMH19060425-V04-17-page4.txt: [('AD-', 'AD'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060502-V04-18-page1.txt: [('-', ''), ('suf-', 'suf'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-')]
WMH19060502-V04-18-page3.txt: [('excel-', 'excel'), ('-lent', 'lent'), ('ARBEI-', 'ARBEI'), ('arrange-', 'arrange'), ('in-', 'in')]
WMH19060502-V04-18-page4.txt: [('-to', 'to'), ('-church', 'church')]
WMH19060509-V04-19-page1.txt: [('-', ''), ('GATHERED-', 'GATHERED'), ('"---', '"--')]
WMH19060509-V04-19-page2.txt: [('-I', 'I'), ('-', ''), ('-', ''), ('-.God.', '.God.'), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('PROPH-', 'PROPH'), ('PROPH-', 'PROPH'), ('PROPH-', 'PROPH'), ('Lakeview-', 'Lakeview')]
WMH19060509-V04-19-page3.txt: [('-', ''), ('-', ''), ('Rogers-', 'Rogers'), ('-', ''), ('-ho', 'ho'), ('near-', 'near'), ('be-', 'be'), ('mat-', 'mat'), ('-', '')]
WMH19060523-V04-20-page2.txt: [('-', ''), ('-man', 'man')]
WMH19060523-V04-20-page3.txt: [('-', ''), ('-', '')]
WMH19060530-V04-21-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-school', 'school'), ('-', ''), ('-', ''), ('-', '')]
WMH19060530-V04-21-page2.txt: [('-', ''), ('-', ''), ('denomi-', 'denomi')]
WMH19060530-V04-21-page3.txt: [('im-', 'im')]
WMH19060530-V04-21-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060606-V04-22-page1.txt: [('..-', '..')]
WMH19060606-V04-22-page2.txt: [('-', '')]
WMH19060606-V04-22-page3.txt: [('-Sec.', 'Sec.'), ('.-', '.'), ('SUP-', 'SUP'), ('publishers.compliment-', 'publishers.compliment')]
WMH19060606-V04-22-page4.txt: [('-Literary', 'Literary'), ('Michi-', 'Michi'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060613-V04-23-page1.txt: [('-Sabbath-school', 'Sabbath-school'), ('-Kalamazoo', 'Kalamazoo')]
WMH19060613-V04-23-page3.txt: [('-', ''), ('-', ''), ('mail-', 'mail'), ('-', '')]
WMH19060613-V04-23-page4.txt: [('ES-', 'ES'), ('Missis.-', 'Missis.'), ('-', '')]
WMH19060620-V04-24-page1.txt: [('-', ''), ('-', ''), ('-utmost', 'utmost')]
WMH19060620-V04-24-page2.txt: [('REPENT-', 'REPENT'), ('RE-', 'RE'), ('REPENT-', 'REPENT'), ('-', '')]
WMH19060620-V04-24-page3.txt: [('-obedience', 'obedience'), ('-', '')]
WMH19060620-V04-24-page4.txt: [('returning-', 'returning'), ('ut-', 'ut')]
WMH19060627-V04-25-page1.txt: [('viz.--', 'viz.-'), ('-four-page', 'four-page'), ('-', ''), ('Im-', 'Im')]
WMH19060627-V04-25-page2.txt: [('-RST', 'RST'), ('sympaths-', 'sympaths')]
WMH19060627-V04-25-page4.txt: [('-', '')]
WMH19060704-V04-26-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('nee-', 'nee')]
WMH19060704-V04-26-page2.txt: [('-the', 'the'), ('some-', 'some')]
WMH19060704-V04-26-page3.txt: [('per-', 'per'), ('--Success.', '-Success.')]
WMH19060704-V04-26-page4.txt: [('-', '')]
WMH19060711-V04-27-page1.txt: [('in-', 'in')]
WMH19060711-V04-27-page2.txt: [('-', ''), ('Offerings-', 'Offerings'), ('-', '')]
WMH19060711-V04-27-page3.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19060711-V04-27-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('employ-', 'employ'), ('Contro-', 'Contro')]
WMH19060718-V04-28-page1.txt: [('-', ''), ('lir-', 'lir'), ('-', ''), ('-school', 'school'), ('-', ''), ('-', ''), ('-', ''), ('---', '--'), ('-', '')]
WMH19060718-V04-28-page2.txt: [('-', ''), ('ends--', 'ends-'), ('sup-', 'sup')]
WMH19060718-V04-28-page3.txt: [('Con-', 'Con')]
WMH19060718-V04-28-page4.txt: [('--', '-'), ('Pennsyl-', 'Pennsyl')]
WMH19060725-V04-29-page1.txt: [('-', ''), ('REAPETHGATHRETI-', 'REAPETHGATHRETI'), ('\'"--', '\'"-')]
WMH19060725-V04-29-page2.txt: [('es-', 'es'), ('re-', 're')]
WMH19060725-V04-29-page3.txt: [('COL-', 'COL')]
WMH19060725-V04-29-page4.txt: [('-', ''), ('Ed-', 'Ed'), ('Healing--', 'Healing-'), ('"Left-', '"Left'), ('Safe-', 'Safe')]
WMH19060801-V04-30-page1.txt: [('-', '')]
WMH19060801-V04-30-page2.txt: [('us-', 'us')]
WMH19060801-V04-30-page3.txt: [('"A"-', '"A"')]
WMH19060808-V04-31-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('APPRO-', 'APPRO'), ('CAN-', 'CAN'), ('REA-', 'REA')]
WMH19060808-V04-31-page3.txt: [('I-', 'I'), ('-', ''), ('re-', 're')]
WMH19060808-V04-31-page4.txt: [('.-', '.'), ('con-', 'con'), ('-followers', 'followers'), ('-', '')]
WMH19060822-V04-32-page1.txt: [('a-', 'a'), ('r-', 'r'), ('-', ''), ('-study', 'study'), ('-', '')]
WMH19060822-V04-32-page2.txt: [('-', '')]
WMH19060822-V04-32-page3.txt: [('-', ''), ('-', ''), ('to-day--', 'to-day-'), ('political--', 'political-')]
WMH19060822-V04-32-page4.txt: [('-had', 'had')]
WMH19060829-V04-33-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060829-V04-33-page2.txt: [('-the', 'the'), ('-one', 'one'), ('-', '')]
WMH19060829-V04-33-page3.txt: [('-', ''), ('-', '')]
WMH19060829-V04-33-page4.txt: [('-', '')]
WMH19060905-V04-34-page1.txt: [('-', ''), ('--Isaac', '-Isaac'), ('-', ''), ('so.-', 'so.')]
WMH19060905-V04-34-page2.txt: [('SPE-', 'SPE'), ('-', '')]
WMH19060905-V04-34-page3.txt: [('lle-', 'lle'), ('Confer-', 'Confer')]
WMH19060912-V04-35-page1.txt: [('pro-', 'pro'), ('-', '')]
WMH19060912-V04-35-page2.txt: [('-', ''), ('-six', 'six')]
WMH19060912-V04-35-page3.txt: [('re-', 're'), ('-Children', 'Children')]
WMH19060919-V04-36-page1.txt: [('-', ''), ('-by', 'by'), ('-', ''), ('-heart', 'heart'), ('-Exclaims', 'Exclaims'), ('uncorrupti-', 'uncorrupti')]
WMH19060919-V04-36-page2.txt: [('-', ''), ('-', ''), ('mem-', 'mem')]
WMH19060919-V04-36-page3.txt: [('-breaking', 'breaking')]
WMH19060919-V04-36-page4.txt: [('-and', 'and'), ('-', '')]
WMH19060926-V04-37-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('o-', 'o'), ('-', ''), ('-', ''), ('Mes-', 'Mes'), ('-', ''), ('-', ''), ('-', '')]
WMH19060926-V04-37-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19060926-V04-37-page3.txt: [('schools.--', 'schools.-')]
WMH19060926-V04-37-page4.txt: [('"sulpherbag-', '"sulpherbag'), ('exalt-', 'exalt')]
WMH19061003-V04-38-page1.txt: [('-', ''), ('----', '---'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.')]
WMH19061003-V04-38-page4.txt: [('-', ''), ('-Sam', 'Sam'), ('-', '')]
WMH19061010-V04-39-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19061010-V04-39-page2.txt: [('-', ''), ('-', '')]
WMH19061010-V04-39-page3.txt: [('possible-', 'possible')]
WMH19061017-V04-40-page1.txt: [('EAST-', 'EAST'), ('SEND-', 'SEND'), ('TAK-', 'TAK'), ('-', ''), ('--', '-'), ('GATf.-', 'GATf.'), ('-', ''), ('-', ''), ('-', ''), ('con-', 'con')]
WMH19061017-V04-40-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Pub-', 'Pub'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('im-', 'im'), ('every-', 'every'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-variety', 'variety'), ('ad-', 'ad'), ('how-', 'how'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19061017-V04-40-page3.txt: [('intelligently-', 'intelligently'), ('es-', 'es'), ('SWAHN-', 'SWAHN'), ('sec-', 'sec')]
WMH19061017-V04-40-page4.txt: [('RyDER-', 'RyDER'), ('-with', 'with')]
WMH19061024-V04-41-page1.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19061024-V04-41-page3.txt: [('--Education.', '-Education.')]
WMH19061024-V04-41-page4.txt: [('-the', 'the')]
WMH19061031-V04-42-page1.txt: [('-wholly', 'wholly'), ('pur-', 'pur'), ('-', ''), ('-', ''), ('-', ''), ('INTERNA-', 'INTERNA'), ('INTER-', 'INTER')]
WMH19061031-V04-42-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('P-', 'P'), ('-', '')]
WMH19061031-V04-42-page3.txt: [('de-', 'de'), ('--', '-'), ('-----', '----'), ('hav-', 'hav')]
WMH19061107-V04-43-page1.txt: [('-', ''), ('-', ''), ('--This', '-This'), ('--', '-')]
WMH19061107-V04-43-page2.txt: [('---I', '--I')]
WMH19061107-V04-43-page3.txt: [('Gener-', 'Gener')]
WMH19061107-V04-43-page4.txt: [('-', '')]
WMH19061114-V04-44-page1.txt: [('-', ''), ('-', ''), ('--upmuscle', '-upmuscle'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--I', '-I')]
WMH19061114-V04-44-page2.txt: [('--I', '-I'), ('child-', 'child')]
WMH19061114-V04-44-page3.txt: [('submerg-', 'submerg')]
WMH19061114-V04-44-page4.txt: [('-the', 'the'), ('De-', 'De'), ('-', ''), ('-', '')]
WMH19061121-V04-45-page1.txt: [('-', ''), ('-that', 'that'), ('--I', '-I'), ('--labored', '-labored'), ('--Well', '-Well'), ('-', '')]
WMH19061121-V04-45-page2.txt: [('--Chr', '-Chr')]
WMH19061121-V04-45-page3.txt: [('-to', 'to'), ('doubt-', 'doubt'), ('in-', 'in')]
WMH19061121-V04-45-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19061128-V04-46-page1.txt: [('qmir-', 'qmir'), ('-', ''), ('work.-', 'work.')]
WMH19061128-V04-46-page2.txt: [('min-', 'min'), ('dis-', 'dis')]
WMH19061128-V04-46-page3.txt: [('re-', 're'), ('-', ''), ('let-', 'let')]
WMH19061128-V04-46-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19061205-V04-47-page1.txt: [('--Selected.', '-Selected.'), ('Sunday-', 'Sunday'), ('moun-', 'moun'), ('-', ''), ('-', '')]
WMH19061205-V04-47-page2.txt: [('-', '')]
WMH19061205-V04-47-page3.txt: [('-', ''), ('-', '')]
WMH19061205-V04-47-page4.txt: [('occasion.-', 'occasion.')]
WMH19061212-V04-48-page1.txt: [('-', ''), ('-Let', 'Let'), ('-the', 'the'), ('Wednes-', 'Wednes'), ('-', ''), ('-', ''), ('-', ''), ('work-', 'work'), ('-', ''), ('-', '')]
WMH19061212-V04-48-page2.txt: [('-', ''), ('AD-', 'AD')]
WMH19061212-V04-48-page3.txt: [('mani-', 'mani')]
WMH19061212-V04-48-page4.txt: [('-', ''), ('-', '')]
WMH19061219-V04-49-page1.txt: [('-', ''), ('-not', 'not'), ('--', '-')]
WMH19061219-V04-49-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19061226-V04-50-page1.txt: [('-', ''), ('--', '-'), ('-', ''), ('Sunday-', 'Sunday'), ('-', '')]
WMH19061226-V04-50-page3.txt: [('..-', '..'), ('-paper', 'paper'), ('-"Jesus', '"Jesus')]
WMH19061226-V04-50-page4.txt: [('--toe', '-toe')]
WMH19070102-V05-01-page1.txt: [('-that', 'that'), ('-on', 'on'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070102-V05-01-page2.txt: [('educa-', 'educa'), ('-', ''), ('-and', 'and'), ('Sunday-', 'Sunday')]
WMH19070102-V05-01-page4.txt: [('-of', 'of'), ('I-', 'I'), ('-', '')]
WMH19070109-V05-02-page1.txt: [('glar--', 'glar-'), ('ATI-', 'ATI'), ('-', '')]
WMH19070109-V05-02-page2.txt: [('the-', 'the'), ('Pres.-', 'Pres.')]
WMH19070109-V05-02-page3.txt: [('outpeo-', 'outpeo')]
WMH19070109-V05-02-page4.txt: [('-', ''), ('-', ''), ('De-', 'De'), ('-', '')]
WMH19070116-V05-03-page1.txt: [('-.', '.'), ('-', ''), ('-', ''), ('-take', 'take'), ('-the', 'the'), ('-come', 'come')]
WMH19070116-V05-03-page2.txt: [('or-', 'or'), ('-der', 'der')]
WMH19070116-V05-03-page3.txt: [('Fiske-', 'Fiske')]
WMH19070123-V05-04-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('W.J-', 'W.J'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070123-V05-04-page2.txt: [('-simple', 'simple'), ('ex-', 'ex'), ('-', '')]
WMH19070123-V05-04-page3.txt: [('-', ''), ('con-', 'con'), ('-of', 'of')]
WMH19070123-V05-04-page4.txt: [('opposi-', 'opposi'), ('-shed', 'shed')]
WMH19070130-V05-05-page1.txt: [('accord-', 'accord'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070130-V05-05-page4.txt: [('-', ''), ('RE-', 'RE')]
WMH19070206-V05-06-page1.txt: [('-', ''), ('----', '---'), ('-would', 'would'), ('camp-meet-', 'camp-meet'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070206-V05-06-page2.txt: [('Mt.-', 'Mt.'), ('-A', 'A'), ('-', ''), ('Barretr"-', 'Barretr"'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070206-V05-06-page3.txt: [('-denomination', 'denomination'), ('-', '')]
WMH19070206-V05-06-page4.txt: [('-is', 'is'), ('-similar', 'similar'), ('-', '')]
WMH19070213-V05-07-page1.txt: [('ac-', 'ac'), ('Depart-', 'Depart')]
WMH19070213-V05-07-page2.txt: [('-', '')]
WMH19070213-V05-07-page4.txt: [('.seal-', '.seal'), ('-and', 'and')]
WMH19070220-V05-08-page1.txt: [('-to', 'to')]
WMH19070220-V05-08-page2.txt: [('OFFER-', 'OFFER'), ('-', '')]
WMH19070220-V05-08-page3.txt: [('-', ''), ('-', '')]
WMH19070220-V05-08-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070227-V05-09-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('Confer-', 'Confer'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070227-V05-09-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070227-V05-09-page3.txt: [('Dis-', 'Dis'), ('-to', 'to'), ('-colporter', 'colporter'), ('truth.-', 'truth.')]
WMH19070227-V05-09-page4.txt: [('HERALD.-', 'HERALD.'), ('-', ''), ('-i', 'i'), ('--', '-'), ('-', ''), ('-', ''), ('-than', 'than'), ('Re-', 'Re')]
WMH19070306-V05-10-page1.txt: [('-stand', 'stand')]
WMH19070306-V05-10-page2.txt: [('SOLD-', 'SOLD'), ('-', '')]
WMH19070306-V05-10-page3.txt: [('LIBER-', 'LIBER')]
WMH19070306-V05-10-page4.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19070313-V05-11-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-plain', 'plain'), ('--"And', '-"And'), ('--met', '-met')]
WMH19070313-V05-11-page2.txt: [('-', '')]
WMH19070313-V05-11-page3.txt: [('--practical', '-practical')]
WMH19070313-V05-11-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('or-', 'or'), ('Consti-', 'Consti'), ('-', '')]
WMH19070320-V05-12-page2.txt: [('-', '')]
WMH19070320-V05-12-page3.txt: [('-r', 'r'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070327-V05-13-page3.txt: [('associat-', 'associat'), ('consci-', 'consci'), ('--', '-')]
WMH19070327-V05-13-page4.txt: [('--', '-'), ('RE-', 'RE'), ('de-', 'de'), ('-', '')]
WMH19070403-V05-14-page1.txt: [('-', ''), ('II-', 'II')]
WMH19070403-V05-14-page2.txt: [('-', ''), ('con-', 'con')]
WMH19070403-V05-14-page3.txt: [('-know', 'know')]
WMH19070403-V05-14-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070410-V05-15-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070410-V05-15-page2.txt: [('--', '-'), ('-', '')]
WMH19070410-V05-15-page4.txt: [('-', '')]
WMH19070417-V05-16-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('cor-', 'cor'), ('-', ''), ('-', ''), ('REAPETI-', 'REAPETI'), ('be-', 'be')]
WMH19070417-V05-16-page2.txt: [('Danish-', 'Danish')]
WMH19070417-V05-16-page3.txt: [('-', ''), ('--a', '-a')]
WMH19070417-V05-16-page4.txt: [('Pil-', 'Pil')]
WMH19070424-V05-17-page1.txt: [('GATHERED-', 'GATHERED'), ('-', ''), ('-', ''), ('corn-', 'corn'), ('-', ''), ('-', ''), ('-', '')]
WMH19070424-V05-17-page2.txt: [('the-', 'the'), ('IN-', 'IN'), ('IN-', 'IN'), ('-', '')]
WMH19070424-V05-17-page3.txt: [('oforthog-', 'oforthog')]
WMH19070424-V05-17-page4.txt: [('-', '')]
WMH19070501-V05-18-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('wasevery-', 'wasevery'), ('chap-', 'chap'), ('them-', 'them'), ('-', ''), ('-', ''), ('-', '')]
WMH19070501-V05-18-page2.txt: [('confede-', 'confede')]
WMH19070501-V05-18-page3.txt: [('devotedcanvas-', 'devotedcanvas')]
WMH19070501-V05-18-page4.txt: [('interested-', 'interested'), ('-', '')]
WMH19070508-V05-19-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('NOW.-', 'NOW.'), ('plain-', 'plain')]
WMH19070508-V05-19-page2.txt: [('-', ''), ('pur-', 'pur')]
WMH19070508-V05-19-page3.txt: [('-', ''), ('understand.--', 'understand.-')]
WMH19070508-V05-19-page4.txt: [('-', '')]
WMH19070515-V05-20-page1.txt: [('-', ''), ('-', ''), ('ac-', 'ac'), ('--', '-')]
WMH19070515-V05-20-page3.txt: [('Attor-', 'Attor'), ('-', ''), ('-', ''), ('Church-', 'Church'), ('-----', '----'), ('-', ''), ('-', '')]
WMH19070515-V05-20-page4.txt: [('--he', '-he'), ('Conference-', 'Conference'), ('WATCH-', 'WATCH'), ('-', '')]
WMH19070522-V05-21-page1.txt: [('-es', 'es'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070522-V05-21-page3.txt: [('-', '')]
WMH19070522-V05-21-page4.txt: [('Seventh-', 'Seventh'), ('--praise', '-praise'), ('-', ''), ('-', ''), ('-', ''), ('-more', 'more')]
WMH19070529-V05-22-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070529-V05-22-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('af-', 'af')]
WMH19070529-V05-22-page3.txt: [('--', '-')]
WMH19070529-V05-22-page4.txt: [('-', ''), ('-A.', 'A.'), ('-', ''), ('-', ''), ('-', ''), ('refresh-', 'refresh')]
WMH19070605-V05-23-page1.txt: [('--', '-'), ('--every', '-every'), ('-', ''), ('-', ''), ('-', '')]
WMH19070605-V05-23-page2.txt: [('waiting--', 'waiting-')]
WMH19070605-V05-23-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('pub-', 'pub'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070612-V05-24-page1.txt: [('-intercessor.', 'intercessor.'), ('-', '')]
WMH19070612-V05-24-page3.txt: [('-', ''), ('experience--', 'experience-'), ('Sabbath-', 'Sabbath')]
WMH19070619-V05-25-page1.txt: [('con-', 'con'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070619-V05-25-page2.txt: [('-', ''), ('--In', '-In'), ('-', ''), ('-', ''), ('-', '')]
WMH19070619-V05-25-page3.txt: [('-', ''), ('Sabbath-', 'Sabbath'), ('Sabbath-', 'Sabbath'), ('-of', 'of')]
WMH19070619-V05-25-page4.txt: [('-', '')]
WMH19070626-V05-26-page1.txt: [('-', ''), ('adorn-', 'adorn'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070626-V05-26-page2.txt: [('--instructs', '-instructs')]
WMH19070626-V05-26-page3.txt: [('HER-', 'HER')]
WMH19070626-V05-26-page4.txt: [('them-', 'them'), ('--affirmed.', '-affirmed.')]
WMH19070703-V05-27-page1.txt: [('-', '')]
WMH19070703-V05-27-page4.txt: [('-', ''), ('-', '')]
WMH19070710-V05-28-page1.txt: [('GATHERETI-', 'GATHERETI'), ('--', '-'), ('Sabbath-', 'Sabbath'), ('-training.', 'training.'), ('--not', '-not')]
WMH19070710-V05-28-page2.txt: [('oc-', 'oc')]
WMH19070710-V05-28-page4.txt: [('-', ''), ('Camp-', 'Camp'), ('ques-', 'ques')]
WMH19070717-V05-29-page1.txt: [('-', ''), ('-', ''), ('de-', 'de'), ('-----', '----'), ('-school', 'school'), ('-', ''), ('-', ''), ('-', '')]
WMH19070717-V05-29-page2.txt: [('FATH-', 'FATH'), ('righteous-', 'righteous')]
WMH19070724-V05-30-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('E-', 'E'), ('-.', '.'), ('-', '')]
WMH19070724-V05-30-page2.txt: [('MORN-', 'MORN')]
WMH19070731-V05-31-page1.txt: [('re-', 're')]
WMH19070731-V05-31-page2.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19070731-V05-31-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070807-V05-32-page1.txt: [('--', '-'), ('-', ''), ('-"', '"'), ('-notify', 'notify')]
WMH19070807-V05-32-page2.txt: [('Camp--', 'Camp-'), ('-', '')]
WMH19070807-V05-32-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('--health', '-health'), ('Seventh-', 'Seventh'), ('-', '')]
WMH19070807-V05-32-page4.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19070814-V05-33-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-dispensation.', 'dispensation.')]
WMH19070814-V05-33-page2.txt: [('-', '')]
WMH19070814-V05-33-page3.txt: [('mis-', 'mis')]
WMH19070814-V05-33-page4.txt: [('-', ''), ('per-', 'per')]
WMH19070828-V05-34-page1.txt: [('-', '')]
WMH19070828-V05-34-page2.txt: [('-a', 'a'), ('-', ''), ('-', ''), ('prov-', 'prov')]
WMH19070828-V05-34-page3.txt: [('-', '')]
WMH19070828-V05-34-page4.txt: [('-', ''), ('.ASSOCIA-', '.ASSOCIA'), ('-page', 'page')]
WMH19070904-V05-35-page1.txt: [('-', ''), ('---', '--'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('GATHERETI-', 'GATHERETI'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070904-V05-35-page2.txt: [('de-', 'de')]
WMH19070904-V05-35-page3.txt: [('-', '')]
WMH19070911-V05-36-page1.txt: [('-', ''), ('GATHRETI-', 'GATHRETI'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070911-V05-36-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070918-V05-37-page1.txt: [('Dr.-', 'Dr.'), ("'-", "'"), ('GATLiERETI-', 'GATLiERETI')]
WMH19070918-V05-37-page2.txt: [('-coming', 'coming'), ('SERIES-', 'SERIES'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070918-V05-37-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19070918-V05-37-page4.txt: [('LAN-', 'LAN'), ('-', ''), ('es-', 'es'), ('-', ''), ('-', ''), ('-', '')]
WMH19070925-V05-38-page1.txt: [('re-', 're'), ('hear-', 'hear'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('GATHERETI-', 'GATHERETI')]
WMH19070925-V05-38-page3.txt: [('-', ''), ('-', ''), ('---', '--'), ('-Covert', 'Covert')]
WMH19070925-V05-38-page4.txt: [('beau-', 'beau')]
WMH19071002-V05-39-page1.txt: [('-', ''), ('insti-', 'insti'), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('-', '')]
WMH19071002-V05-39-page2.txt: [('-', ''), ('-', '')]
WMH19071002-V05-39-page3.txt: [('-on', 'on')]
WMH19071009-V05-40-page1.txt: [('-reined', 'reined'), ('mot-', 'mot'), ('-', ''), ('-This', 'This'), ('begin-', 'begin'), ('"-----', '"----'), ('-', ''), ('-', '')]
WMH19071009-V05-40-page2.txt: [('par-', 'par')]
WMH19071009-V05-40-page3.txt: [('-', ''), ('-', ''), ('.-', '.'), ('-', '')]
WMH19071009-V05-40-page4.txt: [('-', ''), ('-Doctor', 'Doctor'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071016-V05-41-page1.txt: [('-Last', 'Last'), ('con-', 'con'), ('-', ''), ('-', ''), ('-', ''), ('-HE', 'HE'), ('-', ''), ('al-', 'al'), ('para-', 'para'), ('un-', 'un'), ('self-', 'self')]
WMH19071016-V05-41-page2.txt: [('en-', 'en'), ('-joy', 'joy'), ('per-', 'per')]
WMH19071016-V05-41-page3.txt: [('-a', 'a'), ('GENERA-', 'GENERA')]
WMH19071016-V05-41-page4.txt: [('-', ''), ('WORK-', 'WORK')]
WMH19071023-V05-42-page1.txt: [('com-', 'com'), ('-', ''), ('-vil', 'vil'), ('-', ''), ('confirm-', 'confirm'), ('question-', 'question')]
WMH19071023-V05-42-page2.txt: [('-', ''), ('re-', 're'), ('-', '')]
WMH19071023-V05-42-page3.txt: [('-field.', 'field.'), ('-cometogether.', 'cometogether.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071023-V05-42-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071030-V05-43-page1.txt: [('-', '')]
WMH19071030-V05-43-page2.txt: [('Mich-', 'Mich')]
WMH19071030-V05-43-page3.txt: [('.-', '.'), ('-', ''), ('-', ''), ('-Ps.', 'Ps.')]
WMH19071030-V05-43-page4.txt: [('-both', 'both'), ('-', '')]
WMH19071106-V05-44-page1.txt: [("'--", "'-"), ('GOV-', 'GOV'), ('SUPER-', 'SUPER'), ('SAB-', 'SAB')]
WMH19071106-V05-44-page2.txt: [('DISESTAB-', 'DISESTAB'), ('--', '-'), ('-', '')]
WMH19071106-V05-44-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('w.-', 'w.'), ('-bara', 'bara'), ('-', '')]
WMH19071106-V05-44-page4.txt: [('-Note', 'Note'), ('-', ''), ('-The', 'The')]
WMH19071113-V05-45-page1.txt: [('instruct-', 'instruct'), ('GATOERETI-', 'GATOERETI')]
WMH19071113-V05-45-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071113-V05-45-page4.txt: [('-', '')]
WMH19071120-V05-46-page1.txt: [('eIRV-', 'eIRV'), ('GATHERETI-', 'GATHERETI'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071120-V05-46-page2.txt: [('-with', 'with'), ('--aneyear', '-aneyear'), ('-members', 'members'), ('SERIES-', 'SERIES')]
WMH19071120-V05-46-page3.txt: [('--', '-'), ('-', ''), ('Ending-', 'Ending'), ('-', '')]
WMH19071127-V05-47-page1.txt: [('-', ''), ('-', ''), ('Vr-', 'Vr'), ('ans-', 'ans'), ('pre-', 'pre'), ('-and', 'and'), ('-nothingness', 'nothingness'), ('im-', 'im'), ('be-', 'be')]
WMH19071127-V05-47-page2.txt: [("-widow's", "widow's"), ('-a', 'a'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-i', 'i'), ('-', ''), ('-', ''), ('-ix.', 'ix.'), ('-', '')]
WMH19071127-V05-47-page3.txt: [('-', ''), ('RE-', 'RE'), ('DE-', 'DE'), ('"sur-', '"sur')]
WMH19071127-V05-47-page4.txt: [('-', '')]
WMH19071204-V05-48-page1.txt: [('-', ''), ('faith-', 'faith')]
WMH19071204-V05-48-page2.txt: [('-', '')]
WMH19071204-V05-48-page3.txt: [('-', ''), ('-Their', 'Their')]
WMH19071204-V05-48-page4.txt: [('-', ''), ('reg-', 'reg')]
WMH19071211-V05-49-page1.txt: [('-', ''), ('-lad', 'lad'), ('-disease', 'disease'), ('-', '')]
WMH19071211-V05-49-page2.txt: [('-', ''), ('-', ''), ('the-', 'the')]
WMH19071211-V05-49-page3.txt: [('-', ''), ('Carr-', 'Carr'), ('Le-', 'Le')]
WMH19071211-V05-49-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19071218-V05-50-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('temp-', 'temp'), ('-', ''), ('ser-', 'ser')]
WMH19071218-V05-50-page2.txt: [('-Lord.', 'Lord.'), ('in-', 'in')]
WMH19071218-V05-50-page3.txt: [('-', ''), ('criti-', 'criti'), ('-', '')]
WMH19071218-V05-50-page4.txt: [('-', ''), ('Ed-', 'Ed'), ('-', ''), ('--', '-'), ('-', ''), ('-', '')]
WMH19080101-V06-01-page2.txt: [('r-', 'r'), ('WATCH-', 'WATCH'), ('--', '-'), ('-ceed.', 'ceed.'), ('success."-', 'success."'), ('-', ''), ('-', '')]
WMH19080101-V06-01-page3.txt: [('-', ''), ('sub-', 'sub'), ('-been', 'been')]
WMH19080101-V06-01-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-the', 'the'), ('consecrat-', 'consecrat'), ("-'Our", "'Our"), ('Con-', 'Con'), ('-from', 'from')]
WMH19080108-V06-02-page1.txt: [('mic.-', 'mic.'), ('--Right', '-Right'), ('-', ''), ('-exciteme', 'exciteme'), ('-The', 'The'), ('gath-', 'gath'), ('-public', 'public'), ('informa-', 'informa')]
WMH19080108-V06-02-page2.txt: [('-done', 'done'), ('Grand-', 'Grand'), ('-', '')]
WMH19080108-V06-02-page3.txt: [('-', ''), ('-II.', 'II.'), ('-man.', 'man.'), ('-', '')]
WMH19080108-V06-02-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080115-V06-03-page1.txt: [('WATCH-', 'WATCH')]
WMH19080115-V06-03-page2.txt: [('-Our', 'Our'), ('WATCH-', 'WATCH'), ('-', '')]
WMH19080115-V06-03-page3.txt: [('distri-', 'distri'), ('-company', 'company'), ('worle-', 'worle')]
WMH19080115-V06-03-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080122-V06-04-page1.txt: [('-the', 'the'), ('pic-', 'pic'), ('faith-', 'faith')]
WMH19080122-V06-04-page2.txt: [('-and', 'and'), ('fashion-', 'fashion')]
WMH19080122-V06-04-page3.txt: [('-', '')]
WMH19080122-V06-04-page4.txt: [('--Wellspri', '-Wellspri'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080129-V06-05-page1.txt: [('fte-', 'fte'), ('GATHERETI-', 'GATHERETI')]
WMH19080129-V06-05-page2.txt: [('-', '')]
WMH19080129-V06-05-page3.txt: [('And-', 'And'), ('-This', 'This'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080129-V06-05-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('BET-', 'BET'), ('-', ''), ('-', ''), ('-', '')]
WMH19080205-V06-06-page1.txt: [('-..t..Pft', '..t..Pft'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-is', 'is')]
WMH19080205-V06-06-page2.txt: [('church-', 'church')]
WMH19080205-V06-06-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080212-V06-07-page1.txt: [('Thous-', 'Thous'), ('-', ''), ('-', ''), ('-', ''), ('Mission-', 'Mission'), ('-', ''), ('-', ''), ('Michigan-', 'Michigan'), ('Ex-', 'Ex'), ('-', ''), ('Nash-', 'Nash'), ('-', '')]
WMH19080212-V06-07-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Conf.-', 'Conf.'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080212-V06-07-page3.txt: [('-present', 'present'), ('-loss', 'loss'), ('.-', '.'), ('-', ''), ('--', '-'), ('-', ''), ('-new', 'new'), ('-', '')]
WMH19080212-V06-07-page4.txt: [('-', ''), ('Se-', 'Se'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080219-V06-08-page1.txt: [('-', ''), ('-', ''), ('Permits-', 'Permits'), ('Credentials-', 'Credentials'), ('Total-', 'Total'), ('employed--', 'employed-'), ('Educa-', 'Educa')]
WMH19080219-V06-08-page3.txt: [('-our', 'our'), ('-', ''), ('-', '')]
WMH19080219-V06-08-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080226-V06-09-page1.txt: [('-', ''), ('-', ''), ('February-', 'February'), ('-', ''), ('-', ''), ('min-', 'min')]
WMH19080226-V06-09-page2.txt: [('De-', 'De'), ('peo-', 'peo')]
WMH19080226-V06-09-page3.txt: [('-', ''), ('plan-', 'plan')]
WMH19080226-V06-09-page4.txt: [('-', '')]
WMH19080304-V06-10-page1.txt: [('-', ''), ('-Manager', 'Manager')]
WMH19080304-V06-10-page2.txt: [('year-', 'year')]
WMH19080304-V06-10-page4.txt: [('.-', '.')]
WMH19080311-V06-11-page1.txt: [('-z', 'z'), ('-second', 'second'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('famil-', 'famil'), ('GATHERETI-', 'GATHERETI'), ('--', '-')]
WMH19080311-V06-11-page3.txt: [('-and', 'and'), ('con-', 'con')]
WMH19080311-V06-11-page4.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19080318-V06-12-page1.txt: [('-.', '.'), ('ERETI-', 'ERETI'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080318-V06-12-page3.txt: [('-', ''), ('-R.', 'R.'), ('-', '')]
WMH19080318-V06-12-page4.txt: [('-', '')]
WMH19080325-V06-13-page1.txt: [('Treas-', 'Treas'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-delegates', 'delegates'), ('the-', 'the'), ('-', ''), ('-', ''), ('GATMERETI-', 'GATMERETI')]
WMH19080325-V06-13-page2.txt: [('-John', 'John'), ('-so', 'so'), ('-', ''), ('March-', 'March')]
WMH19080325-V06-13-page4.txt: [("-cardinal'", "cardinal'"), ('-Nebr.', 'Nebr.'), ('WATCHMAN.-', 'WATCHMAN.')]
WMH19080401-V06-14-page1.txt: [('unad-', 'unad'), ('-', ''), ('-', ''), ('GATRERETI-', 'GATRERETI'), ('-', '')]
WMH19080401-V06-14-page2.txt: [('super-', 'super'), ('-', '')]
WMH19080401-V06-14-page4.txt: [('Years-', 'Years'), ('-June', 'June'), ('-', '')]
WMH19080408-V06-15-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080408-V06-15-page4.txt: [('-We', 'We')]
WMH19080415-V06-16-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('Mes-', 'Mes'), ('-', ''), ('-', ''), ('-', ''), ('Mc-', 'Mc'), ('-', '')]
WMH19080415-V06-16-page2.txt: [('re-', 're')]
WMH19080415-V06-16-page3.txt: [('-', ''), ('-Fitch', 'Fitch'), ('-the', 'the'), ('-we', 'we'), ('Seventh-', 'Seventh'), ('-our', 'our'), ('-', '')]
WMH19080422-V06-17-page1.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19080422-V06-17-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080422-V06-17-page4.txt: [('diet--', 'diet-')]
WMH19080429-V06-18-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('---', '--'), ('-', ''), ('Seventh-', 'Seventh')]
WMH19080429-V06-18-page2.txt: [('-', ''), ('-', '')]
WMH19080429-V06-18-page3.txt: [('busi-', 'busi'), ('-', ''), ('weep-', 'weep'), ('Seventh-', 'Seventh'), ('-reading', 'reading'), ('inform-', 'inform')]
WMH19080429-V06-18-page4.txt: [('-', '')]
WMH19080506-V06-19-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('Ir-', 'Ir')]
WMH19080506-V06-19-page2.txt: [('peo.-', 'peo.'), ('-c.', 'c.')]
WMH19080506-V06-19-page3.txt: [('-', ''), ('-', ''), ('child-', 'child'), ('du-', 'du')]
WMH19080506-V06-19-page4.txt: [('---', '--'), ('-', ''), ('-', ''), ('-', '')]
WMH19080513-V06-20-page1.txt: [('laborer-', 'laborer'), ('uper-', 'uper'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('GATHERETI-', 'GATHERETI'), ('z-', 'z')]
WMH19080513-V06-20-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080513-V06-20-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-on', 'on'), ('-', '')]
WMH19080520-V06-21-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Hof-', 'Hof'), ('-', ''), ('-', ''), ('-', ''), ('-they', 'they'), ('-', ''), ('sifsisev-', 'sifsisev')]
WMH19080520-V06-21-page2.txt: [('Center--', 'Center-'), ('-that', 'that'), ('-', ''), ('Ed-', 'Ed')]
WMH19080520-V06-21-page3.txt: [('-will', 'will'), ('-', ''), ('-', ''), ('-', '')]
WMH19080520-V06-21-page4.txt: [('-', '')]
WMH19080527-V06-22-page1.txt: [('-', ''), ('-', ''), ('rush-', 'rush'), ('-', ''), ('-', ''), ('to-', 'to')]
WMH19080527-V06-22-page2.txt: [('-flavors', 'flavors'), ('-', ''), ('-Two', 'Two')]
WMH19080527-V06-22-page4.txt: [('-', ''), ('PRO-', 'PRO')]
WMH19080603-V06-23-page1.txt: [('it-', 'it'), ('-', ''), ('mechani-', 'mechani'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080603-V06-23-page2.txt: [('-minds', 'minds')]
WMH19080603-V06-23-page3.txt: [('-', '')]
WMH19080603-V06-23-page4.txt: [('-', ''), ('hold-', 'hold')]
WMH19080610-V06-24-page1.txt: [('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('FIELDSECRETARYr-', 'FIELDSECRETARYr')]
WMH19080610-V06-24-page2.txt: [('-', ''), ('corn-', 'corn'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080610-V06-24-page3.txt: [('-we', 'we')]
WMH19080610-V06-24-page4.txt: [('-', ''), ('-James', 'James'), ('HER-', 'HER')]
WMH19080617-V06-25-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('flush-', 'flush')]
WMH19080617-V06-25-page2.txt: [('praise."--', 'praise."-'), ('-', '')]
WMH19080617-V06-25-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-------', '------'), ('-', '')]
WMH19080617-V06-25-page4.txt: [('-', ''), ('-to', 'to'), ('ap-', 'ap'), ('-', '')]
WMH19080624-V06-26-page1.txt: [('"Bless-', '"Bless'), ('-', ''), ('-', ''), ('I-', 'I')]
WMH19080624-V06-26-page2.txt: [('-', '')]
WMH19080624-V06-26-page3.txt: [('-to', 'to'), ('Frank-', 'Frank')]
WMH19080624-V06-26-page4.txt: [('-', ''), ('ap-', 'ap'), ('-', ''), ('-the', 'the')]
WMH19080701-V06-27-page1.txt: [('-', ''), ('-', ''), ('Lexi-', 'Lexi'), ('-', ''), ('-', '')]
WMH19080701-V06-27-page2.txt: [('camp-', 'camp'), ('-', ''), ('be-', 'be'), ('-', '')]
WMH19080701-V06-27-page3.txt: [('-know', 'know')]
WMH19080701-V06-27-page4.txt: [('-the', 'the'), ('WATCH-', 'WATCH'), ('-', '')]
WMH19080708-V06-28-page1.txt: [('-', ''), ('-', ''), ('-', '')]
WMH19080708-V06-28-page3.txt: [('-', ''), ('suffering-', 'suffering'), ('-', '')]
WMH19080708-V06-28-page4.txt: [('-', ''), ('-the', 'the'), ('-', ''), ('-', ''), ('as-', 'as')]
WMH19080715-V06-29-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-and', 'and'), ('-', '')]
WMH19080715-V06-29-page2.txt: [('-ddaayy', 'ddaayy'), ('-', ''), ('-and', 'and'), ('-', '')]
WMH19080715-V06-29-page3.txt: [('-church', 'church'), ('-', '')]
WMH19080715-V06-29-page4.txt: [('-', ''), ('-', '')]
WMH19080722-V06-30-page1.txt: [('Seventh-', 'Seventh'), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080722-V06-30-page2.txt: [('-issue', 'issue'), ('LIB-', 'LIB'), ('the-', 'the'), ('-', '')]
WMH19080722-V06-30-page4.txt: [('-convinced', 'convinced'), ('-great', 'great'), ('-', ''), ('-church', 'church'), ('--James', '-James')]
WMH19080729-V06-31-page1.txt: [('-', ''), ('-', ''), ('Hof.-', 'Hof.')]
WMH19080729-V06-31-page2.txt: [('-', '')]
WMH19080729-V06-31-page3.txt: [('depart-', 'depart'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-Park', 'Park'), ('-', ''), ('let-', 'let')]
WMH19080729-V06-31-page4.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080805-V06-32-page1.txt: [('be-', 'be'), ('-', ''), ('-', ''), ('-', ''), ('COM-', 'COM'), ('sub-', 'sub')]
WMH19080805-V06-32-page2.txt: [('ox-', 'ox'), ('-', '')]
WMH19080805-V06-32-page3.txt: [('-to', 'to'), ('na-', 'na')]
WMH19080805-V06-32-page4.txt: [('-', ''), ('-the', 'the')]
WMH19080812-V06-33-page1.txt: [('-', ''), ('-', ''), ('af-', 'af'), ('-', ''), ('-', ''), ('-', '')]
WMH19080812-V06-33-page2.txt: [('-', '')]
WMH19080812-V06-33-page3.txt: [('GENER-', 'GENER'), ('ex-', 'ex')]
WMH19080812-V06-33-page4.txt: [('Publish-', 'Publish')]
WMH19080826-V06-34-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19080826-V06-34-page2.txt: [('Anti-', 'Anti')]
WMH19080826-V06-34-page3.txt: [('class-', 'class'), ('-', ''), ('-made', 'made')]
WMH19080826-V06-34-page4.txt: [('-', '')]
WMH19080902-V06-35-page1.txt: [('-our', 'our'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--SELECTED.', '-SELECTED.'), ('-', '')]
WMH19080902-V06-35-page2.txt: [('-opportune', 'opportune'), ('followed-', 'followed')]
WMH19080902-V06-35-page3.txt: [('-', '')]
WMH19080902-V06-35-page4.txt: [('-', '')]
WMH19080909-V06-36-page1.txt: [('-I', 'I'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('night--', 'night-'), ('-', '')]
WMH19080909-V06-36-page2.txt: [('auspi-', 'auspi')]
WMH19080909-V06-36-page3.txt: [('-', ''), ('-', '')]
WMH19080909-V06-36-page4.txt: [('-', '')]
WMH19080916-V06-37-page1.txt: [('-', ''), ('Michigan.-', 'Michigan.'), ('r-', 'r'), ('-', ''), ('-', ''), ('-', ''), ('-arise', 'arise'), ('-holy', 'holy'), ('con-', 'con')]
WMH19080916-V06-37-page2.txt: [('-', ''), ('-', ''), ('-neither', 'neither'), ('the-', 'the'), ('-for', 'for'), ('-pay', 'pay')]
WMH19080916-V06-37-page3.txt: [('shad-', 'shad'), ('-consummation.', 'consummation.'), ('-', '')]
WMH19080916-V06-37-page4.txt: [('Place-', 'Place'), ('-', ''), ('-', ''), ('the-', 'the'), ('Almeda-', 'Almeda')]
WMH19080923-V06-38-page1.txt: [('-', ''), ('-', ''), ('-A--udi.t', 'A--udi.t'), ('-', ''), ('-', ''), ('-', ''), ('the-', 'the')]
WMH19080923-V06-38-page2.txt: [('dif-', 'dif'), ('-', ''), ('-', ''), ('-', '')]
WMH19080923-V06-38-page3.txt: [('G.-', 'G.'), ('-', ''), ('un-', 'un')]
WMH19080923-V06-38-page4.txt: [('-you', 'you'), ('-should', 'should')]
WMH19080930-V06-39-page1.txt: [('-', ''), ('-and', 'and'), ('go-', 'go'), ('-', ''), ('-', '')]
WMH19080930-V06-39-page2.txt: [('-the', 'the')]
WMH19080930-V06-39-page3.txt: [('-', ''), ('-', ''), ('influ.-', 'influ.'), ('-and', 'and'), ('-', '')]
WMH19080930-V06-39-page4.txt: [('-', ''), ('Conference.-', 'Conference.'), ('-', ''), ('--Mrs.', '-Mrs.'), ('-many', 'many'), ('Van-', 'Van')]
WMH19081007-V06-40-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('Hof-', 'Hof')]
WMH19081007-V06-40-page2.txt: [('--Ans.', '-Ans.')]
WMH19081007-V06-40-page3.txt: [('-Business', 'Business'), ('Mateo-', 'Mateo'), ('ENLIGHTEN-', 'ENLIGHTEN')]
WMH19081007-V06-40-page4.txt: [('-heard', 'heard'), ('mis-', 'mis'), ('mes-', 'mes'), ('-', ''), ('-', '')]
WMH19081014-V06-41-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('Hof--', 'Hof-'), ('-', ''), ('--', '-'), ('cL-', 'cL')]
WMH19081014-V06-41-page2.txt: [('-', ''), ('-', ''), ('-Labor', 'Labor'), ('re-', 're'), ('-the', 'the'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19081014-V06-41-page3.txt: [('commit-', 'commit'), ('develop-', 'develop'), ('corn--', 'corn-')]
WMH19081014-V06-41-page4.txt: [('advo-', 'advo'), ('be-', 'be'), ('life.-', 'life.'), ('illus-', 'illus'), ('-to', 'to')]
WMH19081021-V06-42-page1.txt: [('especi-', 'especi'), ('-', ''), ('-', ''), ('-', '')]
WMH19081021-V06-42-page2.txt: [('for-', 'for')]
WMH19081021-V06-42-page3.txt: [('truth-', 'truth'), ('-', ''), ('-continue', 'continue'), ('Pool-', 'Pool')]
WMH19081028-V06-43-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('con.-', 'con.'), ('-fulfilled', 'fulfilled')]
WMH19081028-V06-43-page2.txt: [('RE-', 'RE'), ('-', '')]
WMH19081028-V06-43-page3.txt: [('-this', 'this'), ('-', ''), ('-So', 'So'), ('-Soon', 'Soon')]
WMH19081028-V06-43-page4.txt: [('-Growth', 'Growth'), ('-Philippians.', 'Philippians.'), ('-page', 'page')]
WMH19081104-V06-44-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19081104-V06-44-page2.txt: [('-obtain', 'obtain'), ('-to', 'to')]
WMH19081104-V06-44-page3.txt: [('.-', '.'), ('-', ''), ('quar-', 'quar'), ('Sab-', 'Sab'), ('-', '')]
WMH19081104-V06-44-page4.txt: [('Danish-', 'Danish'), ('Danish-', 'Danish'), ('Danish-', 'Danish'), ('MICHI-', 'MICHI')]
WMH19081111-V06-45-page1.txt: [('na-', 'na'), ('-', ''), ('-C.', 'C.'), ('-', ''), ('-', ''), ('P-', 'P')]
WMH19081111-V06-45-page2.txt: [('Harbor-', 'Harbor'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')]
WMH19081111-V06-45-page3.txt: [('-', ''), ('firmame-', 'firmame'), ('-', ''), ('-bare', 'bare'), ('-', ''), ('-', '')]
WMH19081111-V06-45-page5.txt: [('LJesY-', 'LJesY')]
In [30]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction3 Average verified rate: 0.9762576375579471 Average of error rates: 0.02556951102588687 Total token count: 915705
In [31]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[31]:
[('m', 1749),
('w', 1503),
('g', 1437),
('e', 1313),
('d', 1278),
('r', 688),
('n', 645),
("'", 511),
('f', 444),
('t', 382),
('th', 283),
('co', 172),
('oo', 171),
('sabbathschool', 163),
('io', 120),
('mt', 108),
('k', 107),
('ro', 96),
('wm', 82),
('numbess', 75),
('re', 71),
('u', 69),
("'field", 67),
("canvassers'", 58),
('x', 46),
("'the", 44),
('horr', 39),
("the'", 38),
('rd', 33),
('blendon', 32),
('ex', 32),
('brower', 31),
('harnden', 30),
("f'd", 30),
('mchugh', 29),
('seventhday', 28),
('nd', 28),
('cleora', 27),
('tion', 25),
('nunica', 23),
('sabbathschools', 23),
('q', 23),
("'to", 22),
('-', 21),
('vowyla', 21),
('al', 21),
('z', 20),
('loth', 20),
('fd', 20),
('michi', 20)]
Correction 4 -- Remove extra quotation marks¶
In [33]:
# %load shared_elements/remove_extra_quotation_marks.py
prev = cycle
cycle = "correction4"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
corrections = []
for token in tokens:
token_list = list(token)
last_char = token_list[-1]
if last_char is "'":
if len(token) > 1:
if token_list[-2] is 's' or 'S':
pass
else:
corrections.append((token, re.sub(r"'", r"", token)))
else:
pass
elif token[0] is "'":
corrections.append((token, re.sub(r"'", r"", token)))
else:
pass
if len(corrections) > 0:
print('{}: {}'.format(filename, corrections))
for correction in corrections:
content = clean.replace_pair(correction, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
WMH19030513-V01-19-page1.txt: [("'Alarmed", 'Alarmed')]
WMH19030520-V01-20-page4.txt: [("'born", 'born')]
WMH19030527-V01-21-page4.txt: [("'An", 'An')]
WMH19030610-V01-23-page1.txt: [("'Evangelical", 'Evangelical')]
WMH19030610-V01-23-page2.txt: [("'promotes", 'promotes')]
WMH19030610-V01-23-page4.txt: [("'Rem", 'Rem'), ("'Rotes", 'Rotes')]
WMH19030624-V01-25-page4.txt: [("'dews", 'dews'), ("'notes", 'notes')]
WMH19030701-V01-26-page4.txt: [("'news", 'news'), ("'Pews", 'Pews'), ("'Rotes", 'Rotes')]
WMH19030708-V01-27-page2.txt: [("'Department", 'Department')]
WMH19030708-V01-27-page4.txt: [("'school", 'school'), ("'Mews", 'Mews'), ("'Rotes", 'Rotes')]
WMH19030715-V01-28-page1.txt: [("'Tis", 'Tis')]
WMH19030715-V01-28-page3.txt: [("'.Died", '.Died')]
WMH19030715-V01-28-page4.txt: [("'Flews", 'Flews'), ("'Hews", 'Hews')]
WMH19030722-V01-29-page2.txt: [("'and", 'and')]
WMH19030722-V01-29-page4.txt: [("'Flews", 'Flews'), ("'Motes", 'Motes'), ("'Pews", 'Pews')]
WMH19030930-V01-39-page3.txt: [("'pecan", 'pecan')]
WMH19030930-V01-39-page4.txt: [("'news", 'news'), ("'Notes", 'Notes')]
WMH19031028-V01-43-page1.txt: [("'self", 'self')]
WMH19031028-V01-43-page3.txt: [("'all", 'all')]
WMH19031028-V01-43-page4.txt: [("'Hews", 'Hews'), ("'by", 'by'), ("'we", 'we'), ("'Hews", 'Hews')]
WMH19031118-V01-46-page4.txt: [("'Flews", 'Flews'), ("'notes", 'notes'), ("'each", 'each')]
WMH19040106-V02-02-page1.txt: [("'the", 'the')]
WMH19040113-V02-03-page3.txt: [("'We", 'We')]
WMH19040113-V02-03-page4.txt: [("'Battle", 'Battle')]
WMH19040210-V02-06-page2.txt: [("'treatments", 'treatments'), ("'summer", 'summer'), ("'all", 'all')]
WMH19040210-V02-06-page3.txt: [("'Creek", 'Creek')]
WMH19040210-V02-06-page4.txt: [("'dress", 'dress')]
WMH19040217-V02-07-page1.txt: [("'traverse", 'traverse'), ("'Joseph", 'Joseph'), ("'loved", 'loved'), ("'book", 'book'), ("'the", 'the')]
WMH19040224-V02-08-page2.txt: [("'nurses", 'nurses')]
WMH19040302-V02-09-page2.txt: [("'present", 'present')]
WMH19040302-V02-09-page3.txt: [("'stairs", 'stairs')]
WMH19040309-V02-10-page2.txt: [("'Another", 'Another')]
WMH19040309-V02-10-page3.txt: [("'persons", 'persons')]
WMH19040309-V02-10-page4.txt: [("'I", 'I'), ("'My", 'My')]
WMH19040316-V02-11-page1.txt: [("'Michigan", 'Michigan')]
WMH19040316-V02-11-page2.txt: [("'the", 'the'), ("'More", 'More')]
WMH19040316-V02-11-page3.txt: [("'great", 'great')]
WMH19040323-V02-12-page1.txt: [("'we", 'we')]
WMH19040323-V02-12-page2.txt: [("'book", 'book')]
WMH19040323-V02-12-page3.txt: [("'to", 'to')]
WMH19040330-V02-13-page3.txt: [("'unable", 'unable')]
WMH19040406-V02-14-page2.txt: [("'the", 'the'), ("'in", 'in'), ("'eight", 'eight')]
WMH19040406-V02-14-page3.txt: [("'lungs", 'lungs')]
WMH19040420-V02-16-page1.txt: [("'made", 'made'), ("'Not", 'Not')]
WMH19040420-V02-16-page2.txt: [("'been", 'been'), ("'With", 'With')]
WMH19040420-V02-16-page4.txt: [("'Conference.", 'Conference.')]
WMH19040427-V02-17-page1.txt: [("'Michigan", 'Michigan'), ("'branch", 'branch')]
WMH19040427-V02-17-page2.txt: [("'occassionally.", 'occassionally.'), ("'of", 'of')]
WMH19040427-V02-17-page3.txt: [("'Thoburn", 'Thoburn')]
WMH19040504-V02-18-page2.txt: [("'it", 'it'), ("'evil", 'evil')]
WMH19040504-V02-18-page3.txt: [("'A", 'A'), ("'meal", 'meal')]
WMH19040511-V02-19-page1.txt: [("'I", 'I'), ("'that", 'that'), ("'tanner.", 'tanner.')]
WMH19040511-V02-19-page2.txt: [("'up", 'up')]
WMH19040511-V02-19-page3.txt: [("'Freas.", 'Freas.'), ("'Financial.", 'Financial.')]
WMH19040518-V02-20-page1.txt: [("'Was", 'Was'), ("'warm", 'warm'), ("'this", 'this'), ("'others", 'others'), ("'influence", 'influence')]
WMH19040518-V02-20-page2.txt: [("'go", 'go')]
WMH19040601-V02-22-page2.txt: [("'that", 'that')]
WMH19040601-V02-22-page4.txt: [("'Kenyon", 'Kenyon')]
WMH19040608-V02-23-page1.txt: [("'Michigan", 'Michigan'), ("'large", 'large'), ("'and", 'and')]
WMH19040608-V02-23-page3.txt: [("'called", 'called'), ("'to", 'to'), ("'the", 'the')]
WMH19040608-V02-23-page4.txt: [("'was", 'was')]
WMH19040622-V02-24-page1.txt: [("'pay", 'pay')]
WMH19040622-V02-24-page4.txt: [("'and", 'and')]
WMH19040629-V02-25-page1.txt: [("'the", 'the')]
WMH19040629-V02-25-page4.txt: [("'Conference", 'Conference')]
WMH19040706-V02-26-page4.txt: [("'in", 'in'), ("'HERALD", 'HERALD')]
WMH19040713-V02-27-page2.txt: [("'he", 'he')]
WMH19040713-V02-27-page3.txt: [("'session", 'session'), ("'to", 'to')]
WMH19040720-V02-28-page1.txt: [("'Reading", 'Reading')]
WMH19040720-V02-28-page2.txt: [("'when", 'when')]
WMH19040720-V02-28-page3.txt: [("'of", 'of'), ("'teacher", 'teacher'), ("'and", 'and')]
WMH19040720-V02-28-page4.txt: [("'subscriptions", 'subscriptions'), ("'I'.", 'I.')]
WMH19040727-V02-29-page2.txt: [("'favor", 'favor')]
WMH19040810-V02-31-page1.txt: [("'Much", 'Much')]
WMH19040810-V02-31-page2.txt: [("'s", 's')]
WMH19040810-V02-31-page3.txt: [("'disease.", 'disease.'), ("'state", 'state')]
WMH19040817-V02-32-page2.txt: [("'and", 'and'), ("'reaching", 'reaching')]
WMH19040817-V02-32-page4.txt: [("'We", 'We')]
WMH19040831-V02-33-page1.txt: [("'the", 'the')]
WMH19040831-V02-33-page2.txt: [("'as", 'as')]
WMH19040831-V02-33-page3.txt: [("'Breads", 'Breads')]
WMH19040914-V02-34-page1.txt: [("'Michigan", 'Michigan'), ("'poverty.", 'poverty.')]
WMH19040914-V02-34-page2.txt: [("'great", 'great')]
WMH19040914-V02-34-page3.txt: [("'they", 'they'), ("'a", 'a')]
WMH19040914-V02-34-page4.txt: [("'opened", 'opened'), ("'school", 'school')]
WMH19040928-V02-35-page3.txt: [("'twelve", 'twelve')]
WMH19041005-V02-36-page2.txt: [("'c.", 'c.')]
WMH19041005-V02-36-page3.txt: [("'I'REAMENT.", 'IREAMENT.'), ("'a", 'a')]
WMH19041005-V02-36-page4.txt: [("'September.", 'September.')]
WMH19041012-V02-37-page1.txt: [("'aostile", 'aostile'), ("'brief", 'brief')]
WMH19041012-V02-37-page2.txt: [("'of", 'of')]
WMH19041019-V02-38-page3.txt: [("'devoted", 'devoted'), ("'These", 'These'), ("'to", 'to'), ("'for", 'for')]
WMH19041026-V02-39-page1.txt: [("'new", 'new')]
WMH19041026-V02-39-page3.txt: [("'that", 'that'), ("'part", 'part')]
WMH19041026-V02-39-page4.txt: [("'Missionary", 'Missionary'), ("'education", 'education')]
WMH19041102-V02-40-page1.txt: [("'magnitude", 'magnitude')]
WMH19041102-V02-40-page2.txt: [("'work.", 'work.')]
WMH19041102-V02-40-page3.txt: [("'when", 'when')]
WMH19041102-V02-40-page4.txt: [("'doing", 'doing'), ("'The", 'The')]
WMH19041109-V02-41-page2.txt: [("'Sabbath-keepers", 'Sabbath-keepers')]
WMH19041109-V02-41-page4.txt: [("'rule", 'rule')]
WMH19041116-V02-42-page3.txt: [("'the", 'the')]
WMH19041123-V02-43-page1.txt: [("'of", 'of'), ("'prayer", 'prayer')]
WMH19041123-V02-43-page3.txt: [("'The", 'The')]
WMH19041123-V02-43-page4.txt: [("'the", 'the')]
WMH19041130-V02-44-page1.txt: [("'to", 'to')]
WMH19041130-V02-44-page2.txt: [("'a", 'a'), ("'association", 'association')]
WMH19041130-V02-44-page3.txt: [("'scold", 'scold')]
WMH19041207-V02-45-page2.txt: [("'us", 'us'), ("'RECORD.", 'RECORD.')]
WMH19041207-V02-45-page3.txt: [("'services", 'services')]
WMH19041214-V02-46-page3.txt: [("'Food", 'Food'), ("'remove", 'remove'), ("'be", 'be')]
WMH19041214-V02-46-page4.txt: [("'Jesus", 'Jesus')]
WMH19041221-V02-47-page2.txt: [("'When", 'When'), ("'I", 'I'), ("'Each", 'Each')]
WMH19041221-V02-47-page4.txt: [("'message", 'message'), ("'fourth", 'fourth'), ("'up", 'up')]
WMH19041228-V02-48-page1.txt: [("'Wealthy", 'Wealthy')]
WMH19041228-V02-48-page3.txt: [("'you", 'you')]
WMH19050104-V03-01-page2.txt: [("'the", 'the'), ("'NOW.", 'NOW.')]
WMH19050104-V03-01-page4.txt: [("'A", 'A')]
WMH19050118-V03-03-page1.txt: [("'matter", 'matter')]
WMH19050118-V03-03-page2.txt: [("'courage", 'courage'), ("'Christ's", 'Christs'), ("'The", 'The'), ("'Object", 'Object'), ("'Object", 'Object'), ("'Object", 'Object')]
WMH19050118-V03-03-page3.txt: [("'Object", 'Object'), ("'Christ's", 'Christs'), ("'Object", 'Object'), ("'Object", 'Object'), ("'Object", 'Object')]
WMH19050208-V03-05-page1.txt: [("'kind", 'kind')]
WMH19050208-V03-05-page2.txt: [("'seventeen", 'seventeen')]
WMH19050215-V03-06-page3.txt: [("'Hesperia", 'Hesperia'), ("'A", 'A')]
WMH19050215-V03-06-page4.txt: [("'William", 'William')]
WMH19050222-V03-07-page1.txt: [("'The", 'The')]
WMH19050222-V03-07-page3.txt: [("'It", 'It'), ("'work.", 'work.')]
WMH19050301-V03-08-page1.txt: [("'That", 'That')]
WMH19050301-V03-08-page2.txt: [("'W.", 'W.')]
WMH19050315-V03-10-page2.txt: [("'F", 'F'), ("'Southern", 'Southern')]
WMH19050315-V03-10-page4.txt: [("'to", 'to')]
WMH19050322-V03-11-page1.txt: [("'and", 'and')]
WMH19050322-V03-11-page5.txt: [("'WI", 'WI')]
WMH19050329-V03-12-page3.txt: [("'feature", 'feature'), ("'field.", 'field.')]
WMH19050405-V03-13-page1.txt: [("'have", 'have')]
WMH19050405-V03-13-page2.txt: [("'to", 'to'), ("'intensely", 'intensely')]
WMH19050413-V03-14-page1.txt: [("'Sept.", 'Sept.')]
WMH19050413-V03-14-page2.txt: [("'officers", 'officers')]
WMH19050413-V03-14-page3.txt: [("'The", 'The')]
WMH19050419-V03-15-page2.txt: [("'to", 'to')]
WMH19050419-V03-15-page3.txt: [("'on", 'on')]
WMH19050419-V03-15-page4.txt: [("'Cedar", 'Cedar')]
WMH19050426-V03-16-page1.txt: [("'increase", 'increase'), ("'effect", 'effect'), ("'that", 'that')]
WMH19050426-V03-16-page4.txt: [("'Tis", 'Tis'), ("'patients.", 'patients.')]
WMH19050503-V03-17-page1.txt: [("'Fhat", 'Fhat'), ("'of", 'of')]
WMH19050503-V03-17-page2.txt: [("'educational", 'educational'), ("'In", 'In')]
WMH19050510-V03-18-page3.txt: [("'is", 'is'), ("'task", 'task')]
WMH19050510-V03-18-page4.txt: [("'having", 'having'), ("'Come", 'Come'), ("'Here", 'Here')]
WMH19050517-V03-19-page1.txt: [("'work", 'work'), ("'weightiest", 'weightiest')]
WMH19050517-V03-19-page4.txt: [("'Conference", 'Conference')]
WMH19050524-V03-20-page1.txt: [("'and", 'and')]
WMH19050524-V03-20-page2.txt: [("'Christ's", 'Christs')]
WMH19050524-V03-20-page3.txt: [("'Field.", 'Field.')]
WMH19050531-V03-21-page1.txt: [("'leprosy", 'leprosy')]
WMH19050531-V03-21-page2.txt: [("'but", 'but'), ("'Field.", 'Field.'), ("'Now", 'Now')]
WMH19050531-V03-21-page4.txt: [("'Twice", 'Twice'), ("'already", 'already')]
WMH19050607-V03-22-page1.txt: [("'and", 'and')]
WMH19050607-V03-22-page3.txt: [("'EDITH", 'EDITH')]
WMH19050607-V03-22-page4.txt: [("'.", '.')]
WMH19050614-V03-23-page1.txt: [("'Surely", 'Surely')]
WMH19050614-V03-23-page4.txt: [("'He", 'He')]
WMH19050621-V03-24-page1.txt: [("'class", 'class')]
WMH19050621-V03-24-page2.txt: [("'Bring", 'Bring')]
WMH19050621-V03-24-page3.txt: [("'the", 'the'), ("'Sing", 'Sing'), ("'The", 'The'), ("'They", 'They'), ("'As", 'As'), ("'For", 'For')]
WMH19050621-V03-24-page4.txt: [("'body", 'body')]
WMH19050628-V03-25-page1.txt: [("'and", 'and'), ("'RESOLVED", 'RESOLVED')]
WMH19050628-V03-25-page2.txt: [("'Desire", 'Desire'), ("'Desire", 'Desire'), ("'prominent.", 'prominent.')]
WMH19050705-V03-26-page1.txt: [("'twer", 'twer')]
WMH19050705-V03-26-page2.txt: [("'are", 'are')]
WMH19050705-V03-26-page3.txt: [("'Field.", 'Field.'), ("'Hours", 'Hours'), ("'greater", 'greater')]
WMH19050705-V03-26-page4.txt: [("'Be", 'Be')]
WMH19050712-V03-27-page2.txt: [("'should", 'should')]
WMH19050712-V03-27-page3.txt: [("'financial.", 'financial.')]
WMH19050719-V03-28-page3.txt: [("'Field.", 'Field.')]
WMH19050719-V03-28-page4.txt: [("'The", 'The')]
WMH19050726-V03-29-page1.txt: [("'August", 'August')]
WMH19050726-V03-29-page2.txt: [("'Field.", 'Field.'), ("'The", 'The')]
WMH19050802-V03-30-page1.txt: [("'Hume.", 'Hume.')]
WMH19050802-V03-30-page2.txt: [("'Practical", 'Practical')]
WMH19050802-V03-30-page3.txt: [("'school", 'school'), ("'Field.", 'Field.')]
WMH19050802-V03-30-page4.txt: [("'God", 'God')]
WMH19050809-V03-31-page2.txt: [("'for", 'for'), ("'Field.", 'Field.'), ("'upon", 'upon'), ("'Glenwood", 'Glenwood')]
WMH19050809-V03-31-page3.txt: [("'to", 'to')]
WMH19050816-V03-32-page1.txt: [("'in", 'in')]
WMH19050816-V03-32-page3.txt: [("'Jews", 'Jews')]
WMH19050816-V03-32-page4.txt: [("'of", 'of'), ("'Sanctify", 'Sanctify')]
WMH19050830-V03-33-page1.txt: [("'largest", 'largest'), ("'the", 'the')]
WMH19050830-V03-33-page4.txt: [("'Follow", 'Follow')]
WMH19050906-V03-34-page1.txt: [("'schools", 'schools')]
WMH19050906-V03-34-page3.txt: [("'Financial.", 'Financial.')]
WMH19050906-V03-34-page4.txt: [("'young", 'young'), ("'tis", 'tis'), ("'tis", 'tis')]
WMH19050913-V03-35-page2.txt: [("'lead", 'lead')]
WMH19050913-V03-35-page4.txt: [("'last", 'last')]
WMH19050920-V03-36-page1.txt: [("'field.", 'field.')]
WMH19050920-V03-36-page2.txt: [("'quiet", 'quiet'), ("'to", 'to'), ("'to", 'to')]
WMH19050927-V03-37-page1.txt: [("'at", 'at'), ("'those", 'those')]
WMH19050927-V03-37-page2.txt: [("'were", 'were')]
WMH19050927-V03-37-page3.txt: [("'come", 'come'), ("'lifting", 'lifting')]
WMH19051004-V03-38-page3.txt: [("'tis", 'tis')]
WMH19051004-V03-38-page4.txt: [("'Edward", 'Edward')]
WMH19051011-V03-39-page1.txt: [("'the", 'the'), ("'the", 'the')]
WMH19051011-V03-39-page3.txt: [("'done", 'done')]
WMH19051011-V03-39-page4.txt: [("'This", 'This')]
WMH19051018-V03-40-page1.txt: [("'went", 'went')]
WMH19051018-V03-40-page2.txt: [("'Field.", 'Field.')]
WMH19051018-V03-40-page3.txt: [("'great", 'great')]
WMH19051018-V03-40-page4.txt: [("'one", 'one'), ("'WEST", 'WEST')]
WMH19051025-V03-41-page1.txt: [("'him", 'him')]
WMH19051025-V03-41-page2.txt: [("'of", 'of')]
WMH19051025-V03-41-page3.txt: [("'twill", 'twill'), ("'Field.", 'Field.')]
WMH19051101-V03-42-page2.txt: [("'field.", 'field.')]
WMH19051101-V03-42-page3.txt: [("'ferers", 'ferers')]
WMH19051108-V03-43-page1.txt: [("'Field.", 'Field.')]
WMH19051108-V03-43-page3.txt: [("'be", 'be')]
WMH19051115-V03-44-page4.txt: [("'school", 'school')]
WMH19051122-V03-45-page1.txt: [("'Seeking", 'Seeking')]
WMH19051122-V03-45-page3.txt: [("'Week", 'Week')]
WMH19051129-V03-46-page2.txt: [("'at", 'at'), ("'Enter", 'Enter'), ("'Field.", 'Field.')]
WMH19051129-V03-46-page3.txt: [("'Word.", 'Word.')]
WMH19051206-V03-47-page2.txt: [("'are", 'are')]
WMH19051213-V03-48-page1.txt: [("'Tis", 'Tis'), ("'Tis", 'Tis')]
WMH19051213-V03-48-page2.txt: [("'Field.", 'Field.')]
WMH19051213-V03-48-page4.txt: [("'voiced", 'voiced'), ("'three", 'three'), ("'for", 'for')]
WMH19051220-V03-49-page1.txt: [("'tis", 'tis'), ("'All", 'All')]
WMH19051220-V03-49-page2.txt: [("'.", '.'), ("'.", '.')]
WMH19051220-V03-49-page3.txt: [("'Field.", 'Field.')]
WMH19051227-V03-50-page1.txt: [("'study", 'study')]
WMH19051227-V03-50-page3.txt: [("'margin.", 'margin.')]
WMH19051227-V03-50-page4.txt: [('\'"', '"')]
WMH19060103-V04-01-page1.txt: [("'field.", 'field.')]
WMH19060103-V04-01-page4.txt: [("'Christ", 'Christ')]
WMH19060110-V04-02-page2.txt: [("'for", 'for'), ('\'strength."', 'strength."'), ("'Financial.", 'Financial.')]
WMH19060110-V04-02-page3.txt: [("'that", 'that'), ("'that", 'that')]
WMH19060110-V04-02-page4.txt: [("'paper", 'paper')]
WMH19060117-V04-03-page4.txt: [("'o", 'o'), ("'o", 'o')]
WMH19060124-V04-04-page1.txt: [("'I", 'I'), ("'ministers", 'ministers')]
WMH19060124-V04-04-page2.txt: [("'for", 'for'), ("'in", 'in')]
WMH19060131-V04-05-page2.txt: [("'Statement", 'Statement'), ("'o", 'o'), ("'o", 'o'), ("'o", 'o')]
WMH19060131-V04-05-page3.txt: [("'late", 'late'), ("'field.", 'field.')]
WMH19060131-V04-05-page4.txt: [("'E.", 'E.')]
WMH19060207-V04-06-page3.txt: [("'Field.", 'Field.')]
WMH19060214-V04-07-page1.txt: [("'for", 'for')]
WMH19060214-V04-07-page2.txt: [("'should", 'should')]
WMH19060214-V04-07-page3.txt: [("'that", 'that')]
WMH19060221-V04-08-page1.txt: [("'church", 'church')]
WMH19060221-V04-08-page2.txt: [("'.ed", '.ed')]
WMH19060221-V04-08-page3.txt: [("'operation", 'operation')]
WMH19060221-V04-08-page4.txt: [("'or", 'or')]
WMH19060228-V04-09-page3.txt: [("'Field.", 'Field.'), ("'Turn", 'Turn')]
WMH19060228-V04-09-page4.txt: [("'U.", 'U.')]
WMH19060307-V04-10-page2.txt: [("'a", 'a'), ("'Field.", 'Field.')]
WMH19060307-V04-10-page3.txt: [("'are", 'are'), ("'we", 'we')]
WMH19060314-V04-11-page2.txt: [("'C.", 'C.')]
WMH19060314-V04-11-page3.txt: [("'Hosanna", 'Hosanna')]
WMH19060321-V04-12-page1.txt: [("'worker", 'worker')]
WMH19060321-V04-12-page2.txt: [("'Field.", 'Field.'), ("'whom", 'whom')]
WMH19060321-V04-12-page3.txt: [("'the", 'the')]
WMH19060328-V04-13-page2.txt: [("'Field.", 'Field.')]
WMH19060411-V04-15-page1.txt: [("'Union", 'Union')]
WMH19060411-V04-15-page2.txt: [("'Financial.", 'Financial.'), ("'Field.", 'Field.')]
WMH19060411-V04-15-page3.txt: [("'so", 'so')]
WMH19060411-V04-15-page4.txt: [("'amount", 'amount'), ("'Dietetics", 'Dietetics')]
WMH19060425-V04-17-page1.txt: [("'for", 'for')]
WMH19060425-V04-17-page3.txt: [("'of", 'of'), ("'crowned", 'crowned')]
WMH19060425-V04-17-page4.txt: [("'straw", 'straw')]
WMH19060502-V04-18-page3.txt: [("'goo.", 'goo.'), ("'a", 'a'), ("'will", 'will'), ("'small", 'small')]
WMH19060502-V04-18-page4.txt: [("'.Wells", '.Wells')]
WMH19060509-V04-19-page1.txt: [("'i", 'i')]
WMH19060509-V04-19-page3.txt: [("'Field.", 'Field.'), ("'to", 'to')]
WMH19060509-V04-19-page4.txt: [("'the", 'the')]
WMH19060523-V04-20-page3.txt: [("'Field.", 'Field.')]
WMH19060530-V04-21-page2.txt: [("'this", 'this')]
WMH19060530-V04-21-page3.txt: [("'greater", 'greater'), ("'Field.", 'Field.')]
WMH19060613-V04-23-page3.txt: [("'To", 'To')]
WMH19060613-V04-23-page4.txt: [("'new", 'new')]
WMH19060620-V04-24-page1.txt: [("'been", 'been')]
WMH19060620-V04-24-page3.txt: [("'them", 'them')]
WMH19060627-V04-25-page2.txt: [("'Field.", 'Field.')]
WMH19060627-V04-25-page4.txt: [("'up", 'up')]
WMH19060704-V04-26-page1.txt: [("'Bible", 'Bible'), ("'SEGO", 'SEGO')]
WMH19060704-V04-26-page3.txt: [("'Tis", 'Tis')]
WMH19060704-V04-26-page4.txt: [("'the", 'the'), ("'at", 'at'), ("'in", 'in')]
WMH19060711-V04-27-page1.txt: [("'tis", 'tis')]
WMH19060711-V04-27-page3.txt: [("'lath", 'lath'), ('\'Times"', 'Times"')]
WMH19060711-V04-27-page4.txt: [("'Field.", 'Field.')]
WMH19060718-V04-28-page1.txt: [("'ilm", 'ilm')]
WMH19060725-V04-29-page1.txt: [("'Field.", 'Field.'), ("'tiff", 'tiff'), ('\'"', '"')]
WMH19060725-V04-29-page3.txt: [("'prayer", 'prayer'), ("'and", 'and')]
WMH19060725-V04-29-page4.txt: [("'EZRA", 'EZRA')]
WMH19060801-V04-30-page1.txt: [("'great", 'great'), ("'We", 'We')]
WMH19060801-V04-30-page2.txt: [("'loose", 'loose'), ("'Ye", 'Ye'), ("'us", 'us'), ("'victory", 'victory')]
WMH19060801-V04-30-page3.txt: [("'Field.", 'Field.')]
WMH19060801-V04-30-page4.txt: [("'When", 'When'), ("'And", 'And')]
WMH19060808-V04-31-page3.txt: [("'Cedar", 'Cedar'), ("'Field.", 'Field.')]
WMH19060822-V04-32-page2.txt: [("'perfect", 'perfect'), ("'text", 'text'), ("'may", 'may'), ("'be", 'be'), ("'SUNDAY", 'SUNDAY')]
WMH19060822-V04-32-page3.txt: [("'Good", 'Good'), ("'that", 'that'), ("'that", 'that'), ("'.", '.')]
WMH19060829-V04-33-page4.txt: [("'twas", 'twas'), ("'.", '.'), ("'Twill", 'Twill'), ("'keep", 'keep'), ("'Tis", 'Tis'), ("'tis", 'tis'), ("'keep", 'keep')]
WMH19060905-V04-34-page1.txt: [("'Christ.", 'Christ.'), ("'made", 'made')]
WMH19060905-V04-34-page2.txt: [("'work", 'work')]
WMH19060905-V04-34-page4.txt: [("'Without", 'Without')]
WMH19060912-V04-35-page1.txt: [("'T", 'T')]
WMH19060912-V04-35-page2.txt: [("'train", 'train')]
WMH19060912-V04-35-page3.txt: [("'before", 'before')]
WMH19060912-V04-35-page4.txt: [("'Field.", 'Field.')]
WMH19060919-V04-36-page1.txt: [("'Till", 'Till'), ("'Twill", 'Twill'), ("'Twill", 'Twill'), ("'people.", 'people.')]
WMH19060919-V04-36-page4.txt: [("'be", 'be')]
WMH19060926-V04-37-page1.txt: [("'of", 'of')]
WMH19060926-V04-37-page2.txt: [("'Why", 'Why'), ("'I", 'I')]
WMH19060926-V04-37-page3.txt: [("'them", 'them')]
WMH19061003-V04-38-page2.txt: [("'they", 'they')]
WMH19061003-V04-38-page3.txt: [("'being", 'being'), ("'variety", 'variety'), ("'cloth", 'cloth')]
WMH19061003-V04-38-page4.txt: [("'Bertha", 'Bertha')]
WMH19061010-V04-39-page2.txt: [("'s", 's'), ("'is", 'is')]
WMH19061010-V04-39-page3.txt: [("'Field.", 'Field.')]
WMH19061010-V04-39-page4.txt: [("'reports", 'reports'), ("'twixt", 'twixt'), ("'tis", 'tis'), ("'twixt", 'twixt'), ("'and", 'and')]
WMH19061017-V04-40-page1.txt: [("'the", 'the'), ("'.", '.'), ("'field.", 'field.')]
WMH19061017-V04-40-page3.txt: [("'followed", 'followed')]
WMH19061024-V04-41-page2.txt: [("'the", 'the')]
WMH19061024-V04-41-page3.txt: [("'field.", 'field.'), ("'look", 'look')]
WMH19061031-V04-42-page1.txt: [("'W.", 'W.')]
WMH19061031-V04-42-page2.txt: [("'disposition", 'disposition'), ("'We", 'We')]
WMH19061031-V04-42-page3.txt: [("'Field.", 'Field.')]
WMH19061031-V04-42-page4.txt: [("'But", 'But')]
WMH19061107-V04-43-page1.txt: [("'well", 'well')]
WMH19061107-V04-43-page2.txt: [("'root", 'root'), ("'Christ's", 'Christs')]
WMH19061107-V04-43-page3.txt: [("'Field.", 'Field.')]
WMH19061107-V04-43-page4.txt: [("'of", 'of')]
WMH19061114-V04-44-page1.txt: [("'stumps", 'stumps'), ("'illl", 'illl')]
WMH19061114-V04-44-page2.txt: [("'W.", 'W.')]
WMH19061114-V04-44-page3.txt: [("'Field.", 'Field.'), ("'for", 'for')]
WMH19061114-V04-44-page4.txt: [("'Young", 'Young')]
WMH19061121-V04-45-page1.txt: [("'Ole", 'Ole'), ("'and", 'and')]
WMH19061121-V04-45-page2.txt: [("'first", 'first')]
WMH19061121-V04-45-page3.txt: [("'and", 'and')]
WMH19061121-V04-45-page4.txt: [("'now", 'now')]
WMH19061128-V04-46-page1.txt: [("'Nur", 'Nur'), ("'SEGO", 'SEGO')]
WMH19061128-V04-46-page3.txt: [("'has", 'has')]
WMH19061205-V04-47-page2.txt: [("'desire", 'desire'), ("'Your", 'Your')]
WMH19061205-V04-47-page3.txt: [("'West", 'West'), ("'Field.", 'Field.')]
WMH19061212-V04-48-page2.txt: [("'field.", 'field.')]
WMH19061219-V04-49-page1.txt: [("'....", '....'), ("'necessities", 'necessities')]
WMH19061226-V04-50-page2.txt: [("'Field.", 'Field.')]
WMH19070102-V05-01-page3.txt: [("'are", 'are')]
WMH19070102-V05-01-page4.txt: [("'power.", 'power.')]
WMH19070109-V05-02-page1.txt: [("'but", 'but')]
WMH19070116-V05-03-page3.txt: [("'God", 'God')]
WMH19070116-V05-03-page4.txt: [("'of", 'of'), ("'field.", 'field.')]
WMH19070123-V05-04-page2.txt: [("'rrufant", 'rrufant'), ("'never", 'never'), ("'any", 'any')]
WMH19070130-V05-05-page1.txt: [("'as", 'as')]
WMH19070130-V05-05-page2.txt: [("'educational", 'educational')]
WMH19070130-V05-05-page3.txt: [("'us", 'us'), ("'that", 'that')]
WMH19070206-V05-06-page1.txt: [('\'"', '"'), ("'last", 'last')]
WMH19070206-V05-06-page2.txt: [("'Financial.", 'Financial.')]
WMH19070206-V05-06-page4.txt: [("'as", 'as')]
WMH19070213-V05-07-page2.txt: [("'dollars", 'dollars')]
WMH19070220-V05-08-page1.txt: [("'orders", 'orders')]
WMH19070220-V05-08-page3.txt: [("'for", 'for')]
WMH19070227-V05-09-page1.txt: [("'rent", 'rent'), ("'o", 'o')]
WMH19070227-V05-09-page2.txt: [("'o", 'o')]
WMH19070306-V05-10-page1.txt: [("'Woe", 'Woe')]
WMH19070320-V05-12-page2.txt: [("'In", 'In')]
WMH19070320-V05-12-page3.txt: [("'Financial.", 'Financial.'), ("'Battle", 'Battle')]
WMH19070327-V05-13-page1.txt: [("'what", 'what')]
WMH19070327-V05-13-page2.txt: [("'endorsed", 'endorsed'), ("'cOi", 'cOi'), ("'Field.", 'Field.'), ("'Come", 'Come')]
WMH19070327-V05-13-page3.txt: [("'March", 'March'), ("'consideration", 'consideration')]
WMH19070327-V05-13-page4.txt: [("'Freemont", 'Freemont')]
WMH19070417-V05-16-page2.txt: [("'the", 'the'), ("'that", 'that')]
WMH19070417-V05-16-page3.txt: [("'Field.", 'Field.')]
WMH19070424-V05-17-page1.txt: [("'enemy", 'enemy')]
WMH19070424-V05-17-page2.txt: [("'foi", 'foi')]
WMH19070424-V05-17-page3.txt: [("'or", 'or'), ("'field.", 'field.')]
WMH19070424-V05-17-page4.txt: [("'for", 'for')]
WMH19070501-V05-18-page1.txt: [("'Wm", 'Wm')]
WMH19070501-V05-18-page2.txt: [("'knew", 'knew')]
WMH19070501-V05-18-page3.txt: [("'say", 'say'), ("'the", 'the'), ("'field.", 'field.')]
WMH19070501-V05-18-page4.txt: [("'Twould", 'Twould'), ("'read", 'read'), ("'I", 'I')]
WMH19070508-V05-19-page1.txt: [("'the", 'the')]
WMH19070515-V05-20-page1.txt: [("'vat", 'vat'), ("'Twill", 'Twill'), ("'twill", 'twill')]
WMH19070515-V05-20-page2.txt: [("'God", 'God'), ("'Third", 'Third')]
WMH19070522-V05-21-page3.txt: [("'Field.", 'Field.')]
WMH19070529-V05-22-page1.txt: [("'Tis", 'Tis')]
WMH19070529-V05-22-page2.txt: [("'Field.", 'Field.')]
WMH19070529-V05-22-page4.txt: [("'Jet", 'Jet')]
WMH19070605-V05-23-page1.txt: [("'y", 'y')]
WMH19070605-V05-23-page4.txt: [("'field.", 'field.')]
WMH19070612-V05-24-page2.txt: [("'the", 'the'), ("'field.", 'field.')]
WMH19070619-V05-25-page1.txt: [("'handle", 'handle'), ("'E.", 'E.')]
WMH19070619-V05-25-page3.txt: [("'Field.", 'Field.')]
WMH19070626-V05-26-page3.txt: [("'Field.", 'Field.'), ("'prevails.", 'prevails.')]
WMH19070703-V05-27-page4.txt: [("'depot", 'depot')]
WMH19070717-V05-29-page4.txt: [("'after", 'after'), ("'the", 'the'), ("'opened", 'opened')]
WMH19070724-V05-30-page1.txt: [("'m", 'm')]
WMH19070724-V05-30-page3.txt: [("'vas", 'vas'), ("'appear", 'appear'), ("'inspiring", 'inspiring')]
WMH19070731-V05-31-page1.txt: [("'Y", 'Y'), ("'children", 'children')]
WMH19070731-V05-31-page3.txt: [("'Ontario", 'Ontario')]
WMH19070807-V05-32-page2.txt: [("'the", 'the')]
WMH19070807-V05-32-page3.txt: [("'a", 'a')]
WMH19070807-V05-32-page4.txt: [("'cello", 'cello'), ("'Field.", 'Field.')]
WMH19070814-V05-33-page3.txt: [("'friend", 'friend')]
WMH19070828-V05-34-page2.txt: [("'men", 'men')]
WMH19070828-V05-34-page3.txt: [("'Field.", 'Field.'), ("'A", 'A'), ("'appreciate", 'appreciate')]
WMH19070904-V05-35-page1.txt: [("'how", 'how'), ("'employ", 'employ'), ("'our", 'our'), ("'a", 'a')]
WMH19070904-V05-35-page2.txt: [("'the", 'the'), ("'the", 'the'), ("'Do", 'Do')]
WMH19070911-V05-36-page1.txt: [("'Rrimr", 'Rrimr'), ("'classes", 'classes')]
WMH19070911-V05-36-page2.txt: [("'any", 'any')]
WMH19070911-V05-36-page4.txt: [("'the", 'the'), ("'Financial.", 'Financial.'), ("'Iowa", 'Iowa')]
WMH19070918-V05-37-page4.txt: [("'would", 'would'), ("'except", 'except')]
WMH19070925-V05-38-page2.txt: [("'dollars", 'dollars')]
WMH19071002-V05-39-page2.txt: [("'s", 's')]
WMH19071002-V05-39-page4.txt: [("'twixt", 'twixt'), ("'tis", 'tis'), ("'twixt", 'twixt')]
WMH19071009-V05-40-page2.txt: [("'Through", 'Through'), ("'emit", 'emit')]
WMH19071016-V05-41-page1.txt: [("'.", '.')]
WMH19071016-V05-41-page3.txt: [("'faith", 'faith')]
WMH19071023-V05-42-page1.txt: [("'''o", 'o')]
WMH19071030-V05-43-page1.txt: [("'to", 'to')]
WMH19071030-V05-43-page2.txt: [("'Reading", 'Reading')]
WMH19071030-V05-43-page3.txt: [("'until", 'until')]
WMH19071106-V05-44-page1.txt: [("'-", '-')]
WMH19071106-V05-44-page3.txt: [("'beginning", 'beginning'), ("'created", 'created'), ("'form", 'form'), ("'void", 'void'), ("'firmament", 'firmament'), ("'Let", 'Let'), ("'fruit", 'fruit'), ("'signs", 'signs'), ("'seasons", 'seasons')]
WMH19071113-V05-45-page2.txt: [("'There", 'There')]
WMH19071113-V05-45-page3.txt: [("'Trunk's", 'Trunks')]
WMH19071113-V05-45-page4.txt: [("'phones", 'phones'), ("'phone", 'phone')]
WMH19071120-V05-46-page1.txt: [("'Kings", 'Kings')]
WMH19071127-V05-47-page1.txt: [("'The", 'The'), ("'It", 'It')]
WMH19071127-V05-47-page3.txt: [("'now", 'now'), ("'people", 'people'), ("'it", 'it')]
WMH19071204-V05-48-page1.txt: [("'s", 's')]
WMH19071211-V05-49-page1.txt: [("'plEit", 'plEit'), ("'air.", 'air.')]
WMH19071211-V05-49-page2.txt: [("'especially", 'especially'), ("'made", 'made')]
WMH19080101-V06-01-page1.txt: [("'.", '.'), ("'Lis", 'Lis'), ("'Tls", 'Tls')]
WMH19080101-V06-01-page3.txt: [("'field", 'field')]
WMH19080101-V06-01-page4.txt: [("'SIGNS", 'SIGNS'), ("'fifty", 'fifty'), ("'Our", 'Our')]
WMH19080108-V06-02-page2.txt: [("'Field.", 'Field.')]
WMH19080108-V06-02-page3.txt: [("'shall", 'shall'), ("'be", 'be'), ("'sick", 'sick')]
WMH19080115-V06-03-page3.txt: [("'Field.", 'Field.'), ("'o", 'o'), ("'Michigan", 'Michigan')]
WMH19080122-V06-04-page1.txt: [("'Field.", 'Field.'), ("'III", 'III')]
WMH19080122-V06-04-page2.txt: [("'disease", 'disease')]
WMH19080129-V06-05-page1.txt: [("'AdVq", 'AdVq'), ("'tis", 'tis'), ("'greater", 'greater')]
WMH19080129-V06-05-page2.txt: [("'Field.", 'Field.'), ("'see", 'see')]
WMH19080129-V06-05-page3.txt: [("'Christian", 'Christian')]
WMH19080205-V06-06-page1.txt: [("'financial.", 'financial.'), ("'I", 'I')]
WMH19080205-V06-06-page4.txt: [("'once", 'once'), ("'success", 'success')]
WMH19080212-V06-07-page1.txt: [("'Arehk", 'Arehk'), ("'Atlanta", 'Atlanta'), ("'Iowa", 'Iowa')]
WMH19080212-V06-07-page3.txt: [("'Vaunt", 'Vaunt')]
WMH19080212-V06-07-page4.txt: [("'as", 'as'), ("'tis", 'tis')]
WMH19080219-V06-08-page2.txt: [("'RUSSELL.", 'RUSSELL.')]
WMH19080219-V06-08-page4.txt: [("'this", 'this')]
WMH19080226-V06-09-page3.txt: [("'without", 'without')]
WMH19080226-V06-09-page4.txt: [("'to", 'to')]
WMH19080304-V06-10-page1.txt: [("'gave", 'gave')]
WMH19080304-V06-10-page2.txt: [("'Field.", 'Field.')]
WMH19080304-V06-10-page3.txt: [("'a", 'a')]
WMH19080311-V06-11-page1.txt: [("'reflected", 'reflected'), ("'be", 'be')]
WMH19080311-V06-11-page3.txt: [("'Field.", 'Field.')]
WMH19080318-V06-12-page1.txt: [("'.", '.')]
WMH19080318-V06-12-page2.txt: [("'and", 'and')]
WMH19080325-V06-13-page2.txt: [("'spoke", 'spoke'), ("'our", 'our'), ("'minds", 'minds')]
WMH19080325-V06-13-page3.txt: [("'Field.", 'Field.'), ("'earnestly", 'earnestly')]
WMH19080325-V06-13-page4.txt: [("'papers", 'papers')]
WMH19080401-V06-14-page1.txt: [("'in", 'in'), ("'I", 'I')]
WMH19080408-V06-15-page1.txt: [("'the", 'the'), ("'President", 'President')]
WMH19080408-V06-15-page3.txt: [("'something", 'something')]
WMH19080415-V06-16-page1.txt: [("'with", 'with'), ("'E.", 'E.')]
WMH19080415-V06-16-page3.txt: [("'the", 'the'), ("'ii", 'ii')]
WMH19080422-V06-17-page1.txt: [("'a", 'a'), ("'at", 'at')]
WMH19080422-V06-17-page2.txt: [("'the", 'the'), ("'Financial", 'Financial')]
WMH19080422-V06-17-page3.txt: [("'Field.", 'Field.')]
WMH19080429-V06-18-page1.txt: [("'a", 'a')]
WMH19080429-V06-18-page2.txt: [("'race", 'race')]
WMH19080429-V06-18-page3.txt: [("'Field.", 'Field.'), ("'to", 'to'), ("'tention", 'tention'), ("'cause", 'cause')]
WMH19080506-V06-19-page1.txt: [("'III", 'III')]
WMH19080506-V06-19-page2.txt: [("'more", 'more')]
WMH19080506-V06-19-page3.txt: [("'the", 'the')]
WMH19080513-V06-20-page3.txt: [("'be", 'be')]
WMH19080520-V06-21-page1.txt: [("'A", 'A'), ("'in", 'in'), ("'s", 's')]
WMH19080520-V06-21-page3.txt: [("'circle", 'circle'), ("'by", 'by')]
WMH19080520-V06-21-page4.txt: [("'them", 'them')]
WMH19080527-V06-22-page2.txt: [("'to", 'to'), ("'and", 'and'), ("'therefore", 'therefore')]
WMH19080527-V06-22-page3.txt: [("'Field.", 'Field.'), ("'occupying", 'occupying'), ("'our", 'our'), ("'thus", 'thus')]
WMH19080603-V06-23-page1.txt: [("'these", 'these'), ("'do", 'do')]
WMH19080603-V06-23-page2.txt: [("'Field.", 'Field.'), ("'Essay", 'Essay'), ("'hand", 'hand')]
WMH19080603-V06-23-page3.txt: [("'to", 'to')]
WMH19080603-V06-23-page4.txt: [("'so", 'so'), ("'Walter", 'Walter')]
WMH19080610-V06-24-page1.txt: [("'d", 'd')]
WMH19080610-V06-24-page2.txt: [("'They", 'They')]
WMH19080610-V06-24-page3.txt: [("'consecrating", 'consecrating'), ("'for", 'for')]
WMH19080610-V06-24-page4.txt: [("'of", 'of')]
WMH19080617-V06-25-page1.txt: [("'.i", '.i')]
WMH19080617-V06-25-page2.txt: [("'We", 'We')]
WMH19080617-V06-25-page3.txt: [("'in", 'in'), ("'Shelby", 'Shelby')]
WMH19080617-V06-25-page4.txt: [("'been", 'been'), ("'Liberty", 'Liberty')]
WMH19080624-V06-26-page1.txt: [("'much", 'much')]
WMH19080624-V06-26-page2.txt: [("'to", 'to'), ("'o", 'o')]
WMH19080624-V06-26-page3.txt: [("'G.", 'G.')]
WMH19080624-V06-26-page4.txt: [("'Canadian", 'Canadian')]
WMH19080701-V06-27-page2.txt: [("'case", 'case'), ("'I", 'I'), ("'on", 'on'), ("'hindrance", 'hindrance'), ("'tis", 'tis'), ("'and", 'and')]
WMH19080701-V06-27-page3.txt: [("'Fake", 'Fake'), ("'Under", 'Under')]
WMH19080708-V06-28-page1.txt: [("'vt", 'vt'), ("'God's", 'Gods'), ("'funds", 'funds'), ("'bath", 'bath')]
WMH19080708-V06-28-page2.txt: [("'month", 'month')]
WMH19080715-V06-29-page1.txt: [("'to", 'to'), ("'it", 'it')]
WMH19080715-V06-29-page2.txt: [("'Yes", 'Yes'), ("'Why", 'Why'), ("'the", 'the'), ("'I", 'I')]
WMH19080715-V06-29-page3.txt: [("'Some", 'Some'), ("'congregation", 'congregation')]
WMH19080722-V06-30-page1.txt: [("'E.", 'E.')]
WMH19080722-V06-30-page2.txt: [("'this", 'this')]
WMH19080722-V06-30-page3.txt: [("'shown", 'shown')]
WMH19080729-V06-31-page2.txt: [("'patients", 'patients'), ("'will", 'will')]
WMH19080729-V06-31-page4.txt: [("'I", 'I'), ("'Sound", 'Sound'), ("'Elder", 'Elder')]
WMH19080805-V06-32-page1.txt: [("'of", 'of')]
WMH19080805-V06-32-page4.txt: [("'city", 'city'), ("'exercises", 'exercises'), ("'in", 'in'), ("'On", 'On')]
WMH19080812-V06-33-page2.txt: [("'years", 'years')]
WMH19080812-V06-33-page3.txt: [("'Hew", 'Hew'), ("'Take", 'Take')]
WMH19080826-V06-34-page1.txt: [("'an", 'an'), ("'decree", 'decree'), ("'round", 'round'), ("'Twas", 'Twas')]
WMH19080826-V06-34-page4.txt: [("'get", 'get'), ("'prepare", 'prepare')]
WMH19080902-V06-35-page1.txt: [("'them", 'them'), ("'must", 'must'), ("'study", 'study'), ("'Neath", 'Neath')]
WMH19080902-V06-35-page2.txt: [("'entire", 'entire'), ("'o", 'o'), ("'became", 'became')]
WMH19080902-V06-35-page4.txt: [("'happiness.", 'happiness.')]
WMH19080909-V06-36-page1.txt: [("'kingdom", 'kingdom')]
WMH19080909-V06-36-page3.txt: [("'Financial", 'Financial')]
WMH19080916-V06-37-page2.txt: [("'company", 'company')]
WMH19080916-V06-37-page3.txt: [("'Men", 'Men'), ("'so", 'so')]
WMH19080916-V06-37-page4.txt: [("'icopies", 'icopies'), ("'is", 'is')]
WMH19080923-V06-38-page1.txt: [("'acknowledge", 'acknowledge')]
WMH19080923-V06-38-page3.txt: [("'there", 'there')]
WMH19080923-V06-38-page4.txt: [("'It", 'It')]
WMH19080930-V06-39-page1.txt: [("'SABBATH", 'SABBATH')]
WMH19081007-V06-40-page1.txt: [("'OW", 'OW')]
WMH19081007-V06-40-page2.txt: [("'Tis", 'Tis'), ("'of", 'of')]
WMH19081007-V06-40-page3.txt: [("'a", 'a'), ("'o", 'o'), ("'Well", 'Well'), ("'I", 'I')]
WMH19081014-V06-41-page2.txt: [("'and", 'and'), ("'who", 'who')]
WMH19081021-V06-42-page1.txt: [("'unless", 'unless'), ("'missions.", 'missions.')]
WMH19081021-V06-42-page2.txt: [("'that", 'that'), ("'Abstain", 'Abstain')]
WMH19081021-V06-42-page3.txt: [("'years", 'years')]
WMH19081021-V06-42-page4.txt: [("'toward", 'toward')]
WMH19081028-V06-43-page1.txt: [("'was", 'was')]
WMH19081028-V06-43-page3.txt: [("'there", 'there')]
WMH19081028-V06-43-page4.txt: [("'subscribers", 'subscribers')]
WMH19081104-V06-44-page1.txt: [("'tithe", 'tithe'), ("'the", 'the'), ("'graves", 'graves')]
WMH19081104-V06-44-page2.txt: [("'.be", '.be'), ("'Falls", 'Falls'), ("'to", 'to'), ("'is", 'is')]
WMH19081104-V06-44-page4.txt: [("'liberally", 'liberally')]
WMH19081111-V06-45-page1.txt: [("'tives", 'tives'), ("'summer", 'summer')]
WMH19081111-V06-45-page3.txt: [("'Genesis", 'Genesis'), ("'separated", 'separated'), ("'lights", 'lights'), ("'beginning", 'beginning'), ("'created", 'created'), ("'signs", 'signs'), ("'form", 'form'), ("'firmament", 'firmament'), ("'Heaven.", 'Heaven.'), ("'seasons", 'seasons')]
In [36]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction4 Average verified rate: 0.9771874993174814 Average of error rates: 0.024615532118887822 Total token count: 915726
In [37]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[37]:
[('m', 1750),
('w', 1506),
('g', 1439),
('e', 1317),
('d', 1279),
('r', 688),
('n', 645),
("'", 490),
('f', 446),
('t', 384),
('th', 283),
('co', 172),
('oo', 171),
('sabbathschool', 163),
('io', 120),
('mt', 108),
('k', 107),
('ro', 96),
('wm', 83),
('numbess', 75),
('re', 71),
('u', 70),
("canvassers'", 58),
('x', 46),
('horr', 39),
("the'", 38),
('rd', 33),
('blendon', 32),
('ex', 32),
('brower', 31),
('harnden', 30),
("f'd", 30),
('mchugh', 29),
('seventhday', 28),
('nd', 28),
('cleora', 27),
('tion', 25),
('nunica', 23),
('sabbathschools', 23),
('q', 23),
('-', 22),
('vowyla', 21),
('al', 21),
('loth', 20),
('z', 20),
('fd', 20),
('michi', 20),
('psa', 20),
('ti', 20),
('ne', 19)]
Correction 5 -- Rejoin Split Words¶
In [39]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction5"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
errors = reports.identify_errors(tokens, spelling_dictionary)
replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=False)
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_split_words(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
WMH19030128-V01-04-page4.txt: [('co', 'operate')]
WMH19030415-V01-15-page2.txt: [('IMPRES', 'SIONS')]
WMH19030415-V01-15-page4.txt: [('Verm', 'on')]
WMH19030506-V01-18-page3.txt: [('sugg', 'estion')]
WMH19030513-V01-19-page3.txt: [('th', 'in')]
WMH19030520-V01-20-page3.txt: [('co', 'operating')]
WMH19030603-V01-22-page1.txt: [('TES', 'TAMENT')]
WMH19030603-V01-22-page3.txt: [('AB', 'OLISHED')]
WMH19030603-V01-22-page4.txt: [('th', 'at'), ('co', 'laborers')]
WMH19030610-V01-23-page1.txt: [('pre', 'eminently')]
WMH19030610-V01-23-page3.txt: [('unscript', 'ural')]
WMH19030624-V01-25-page4.txt: [('Mc', 'Bride')]
WMH19030715-V01-28-page1.txt: [('mo', 'at')]
WMH19030715-V01-28-page2.txt: [('th', 'e')]
WMH19030715-V01-28-page3.txt: [('wa', 's'), ('developmen', 't')]
WMH19030722-V01-29-page3.txt: [('Kellog', 'g')]
WMH19031028-V01-43-page1.txt: [('ca', 'use')]
WMH19031118-V01-46-page1.txt: [('co', 'operate')]
WMH19031118-V01-46-page3.txt: [('lig', 'and')]
WMH19040113-V02-03-page3.txt: [('AC', 'CEPT')]
WMH19040203-V02-05-page3.txt: [('GENER', 'AL')]
WMH19040203-V02-05-page4.txt: [("Sailor'", 's')]
WMH19040210-V02-06-page3.txt: [('co', 'operation')]
WMH19040210-V02-06-page4.txt: [('Mc', 'Allister')]
WMH19040309-V02-10-page1.txt: [('CO', 'OPERATE')]
WMH19040323-V02-12-page1.txt: [('conven', 'tions')]
WMH19040330-V02-13-page1.txt: [('RE', 'QUISITE'), ('ac', 'es')]
WMH19040330-V02-13-page3.txt: [('bili', 'Ousness')]
WMH19040406-V02-14-page3.txt: [('co', 'operate')]
WMH19040413-V02-15-page3.txt: [('Dimonda', 'le')]
WMH19040420-V02-16-page1.txt: [('treasur', 'ers')]
WMH19040427-V02-17-page3.txt: [('co', 'workers')]
WMH19040427-V02-17-page4.txt: [('ti', 'the')]
WMH19040608-V02-23-page3.txt: [('co', 'operation')]
WMH19040608-V02-23-page4.txt: [('re', 'employ'), ('Scandina', 'vian')]
WMH19040629-V02-25-page3.txt: [('co', 'workers')]
WMH19040629-V02-25-page4.txt: [('th', 'at')]
WMH19040706-V02-26-page2.txt: [('institu', 'tion')]
WMH19040706-V02-26-page3.txt: [('re', 'turn')]
WMH19040713-V02-27-page1.txt: [('CIRCUM', 'STANCES'), ('DEFI', 'NITELY'), ('Vo', 'L')]
WMH19040720-V02-28-page1.txt: [('re', 'a')]
WMH19040727-V02-29-page1.txt: [('re', 'checking')]
WMH19040803-V02-30-page2.txt: [('re', 'hired')]
WMH19040803-V02-30-page3.txt: [('re', 'engaged')]
WMH19040817-V02-32-page2.txt: [('io', 'n')]
WMH19040914-V02-34-page1.txt: [('co', 'laborers')]
WMH19040914-V02-34-page3.txt: [('re', 'elected')]
WMH19040921-V02-34a-page1.txt: [('Io', 'was')]
WMH19040921-V02-34a-page2.txt: [('Responsib', 'ility')]
WMH19040928-V02-35-page1.txt: [('inacces', 'sible')]
WMH19040928-V02-35-page4.txt: [('re', 'opens')]
WMH19041005-V02-36-page4.txt: [('corre', 'late')]
WMH19041019-V02-38-page4.txt: [('Sabbath-', 'school')]
WMH19041026-V02-39-page2.txt: [('PEO', 'PLE')]
WMH19041026-V02-39-page3.txt: [('PRE', 'SENT')]
WMH19041026-V02-39-page4.txt: [('co', 'laborer')]
WMH19041102-V02-40-page3.txt: [('connec', 'tion'), ('co', 'workers')]
WMH19041102-V02-40-page4.txt: [('Gei', 'sel')]
WMH19041123-V02-43-page1.txt: [('sca', 't')]
WMH19041123-V02-43-page4.txt: [('ti', 'e'), ('co', 'operate')]
WMH19041130-V02-44-page3.txt: [('co', 'operation')]
WMH19041207-V02-45-page4.txt: [('reabsorp', 'tion')]
WMH19041221-V02-47-page3.txt: [('EDI', 'TION'), ('NEC', 'ESSARILY')]
WMH19041228-V02-48-page1.txt: [('re', 'counting')]
WMH19041228-V02-48-page2.txt: [('re', 'consecrated')]
WMH19050104-V03-01-page2.txt: [('exul', 'tantly')]
WMH19050111-V03-02-page2.txt: [('co', 'operate')]
WMH19050118-V03-03-page3.txt: [('Ob', 'ject')]
WMH19050201-V03-04-page2.txt: [('ble', 'ssed')]
WMH19050201-V03-04-page4.txt: [('al', 'a')]
WMH19050208-V03-05-page1.txt: [('Cre', 'W')]
WMH19050215-V03-06-page4.txt: [('re', 'mains')]
WMH19050222-V03-07-page2.txt: [('co', 'operation')]
WMH19050222-V03-07-page3.txt: [('co', 'operate')]
WMH19050301-V03-08-page1.txt: [('Mc', 'Curdy')]
WMH19050322-V03-11-page3.txt: [('greate', 'r')]
WMH19050322-V03-11-page4.txt: [('church-s', 'chool')]
WMH19050405-V03-13-page2.txt: [('humani', 'ty')]
WMH19050413-V03-14-page3.txt: [('GIV', 'EN')]
WMH19050419-V03-15-page1.txt: [('co', 'operation')]
WMH19050503-V03-17-page4.txt: [('un', 'able')]
WMH19050510-V03-18-page4.txt: [('increas', 'ing'), ('ro', 'o')]
WMH19050517-V03-19-page1.txt: [('co', 'operation')]
WMH19050517-V03-19-page3.txt: [('Vermontvi', 'lle')]
WMH19050524-V03-20-page4.txt: [('glor', 'ious')]
WMH19050531-V03-21-page3.txt: [('HOFST', 'RA')]
WMH19050531-V03-21-page4.txt: [('vis', 'ited'), ('se', 'cure')]
WMH19050607-V03-22-page3.txt: [('pu', 'pils'), ('co', 'operation')]
WMH19050614-V03-23-page1.txt: [('ex', 'penses')]
WMH19050614-V03-23-page3.txt: [('Educa', 'tion')]
WMH19050614-V03-23-page4.txt: [('co', 'operation')]
WMH19050621-V03-24-page3.txt: [('re', 'echoed')]
WMH19050628-V03-25-page1.txt: [('re', 'established')]
WMH19050705-V03-26-page2.txt: [('co', 'operation')]
WMH19050705-V03-26-page3.txt: [('soci', 'eties')]
WMH19050712-V03-27-page1.txt: [('re', 'No')]
WMH19050712-V03-27-page4.txt: [('Michi', 'gan')]
WMH19050719-V03-28-page3.txt: [('fi', 'st')]
WMH19050726-V03-29-page1.txt: [('ro', 'o')]
WMH19050802-V03-30-page4.txt: [('co', 'operate')]
WMH19050809-V03-31-page2.txt: [('co', 'operate')]
WMH19050816-V03-32-page1.txt: [('ob', 'ject')]
WMH19050830-V03-33-page3.txt: [('ap', 'plicable')]
WMH19050906-V03-34-page3.txt: [('pl', 'acidity')]
WMH19050906-V03-34-page4.txt: [('Wednes', 'day')]
WMH19050920-V03-36-page2.txt: [('th', 'under'), ('io', 'was')]
WMH19050920-V03-36-page4.txt: [('re', 'elected')]
WMH19051004-V03-38-page3.txt: [('coun', 'sel')]
WMH19051018-V03-40-page2.txt: [('ac', 'cepted')]
WMH19051101-V03-42-page1.txt: [('TI', 'e')]
WMH19051101-V03-42-page3.txt: [('suf', 'ferers')]
WMH19051101-V03-42-page4.txt: [('MESSEN', 'GER'), ('Ne', 'braska')]
WMH19051108-V03-43-page1.txt: [('ro', 'o')]
WMH19051108-V03-43-page3.txt: [('re', 'port')]
WMH19051122-V03-45-page2.txt: [('co', 'operate')]
WMH19051129-V03-46-page2.txt: [('co', 'operation')]
WMH19051206-V03-47-page1.txt: [('io', 'n')]
WMH19051206-V03-47-page2.txt: [('ment', 'on')]
WMH19051206-V03-47-page4.txt: [('re', 'organized')]
WMH19051213-V03-48-page2.txt: [('th', 'a')]
WMH19051213-V03-48-page3.txt: [('Ti', 'the'), ('re', 'vived'), ('ac', 'complished')]
WMH19051220-V03-49-page1.txt: [('swi', 'ft'), ('co', 'operation')]
WMH19051220-V03-49-page2.txt: [('G.', ''), ('co', 'operation'), ('CO', 'OPERATION')]
WMH19051220-V03-49-page3.txt: [('peo', 'ple')]
WMH19051227-V03-50-page1.txt: [('io', 'n')]
WMH19051227-V03-50-page2.txt: [('co', 'operation')]
WMH19060103-V04-01-page4.txt: [('pa', 'per'), ('giv', 'ing')]
WMH19060110-V04-02-page1.txt: [('re', 'acting')]
WMH19060110-V04-02-page2.txt: [('ro', 'o')]
WMH19060124-V04-04-page2.txt: [('INCORPO', 'RATED')]
WMH19060124-V04-04-page3.txt: [('co', 'operating'), ('remem', 'bereth')]
WMH19060131-V04-05-page2.txt: [('Mis', 'o'), ('ro', 'o')]
WMH19060207-V04-06-page1.txt: [('Pr', 'esident'), ('COLPORTE', 'UR'), ('ac', 'cepted')]
WMH19060214-V04-07-page2.txt: [('Smi', 'th'), ('ro', 'o')]
WMH19060221-V04-08-page1.txt: [('co', 'operation')]
WMH19060221-V04-08-page2.txt: [('Treasur', 'er')]
WMH19060228-V04-09-page1.txt: [('Janu', 'ary'), ('co', 'operation')]
WMH19060228-V04-09-page2.txt: [('soci', 'eties')]
WMH19060307-V04-10-page3.txt: [('co', 'operation')]
WMH19060314-V04-11-page1.txt: [('co', 'operation'), ('ite', 'm')]
WMH19060314-V04-11-page3.txt: [('co', 'operation'), ('Pa', 'w')]
WMH19060314-V04-11-page4.txt: [('MICHI', 'GAN')]
WMH19060321-V04-12-page1.txt: [('temporari', 'ly')]
WMH19060321-V04-12-page3.txt: [('Pa', 'w')]
WMH19060328-V04-13-page1.txt: [('co', 'operation')]
WMH19060328-V04-13-page4.txt: [('suf', 'fering')]
WMH19060411-V04-15-page1.txt: [('co', 'operation')]
WMH19060411-V04-15-page2.txt: [('ro', 'o')]
WMH19060425-V04-17-page1.txt: [('institu', 'tions')]
WMH19060502-V04-18-page1.txt: [("Sec'", 'y')]
WMH19060502-V04-18-page3.txt: [('ARBEI', 'TER')]
WMH19060509-V04-19-page2.txt: [('PROPH', 'ECY')]
WMH19060523-V04-20-page2.txt: [('appropria', 'tion')]
WMH19060523-V04-20-page3.txt: [('th', 'a')]
WMH19060606-V04-22-page2.txt: [('re', 'establish')]
WMH19060613-V04-23-page3.txt: [('fl', 'o')]
WMH19060613-V04-23-page4.txt: [('ES', 'SENTIAL')]
WMH19060620-V04-24-page2.txt: [('RE', 'PENTED')]
WMH19060620-V04-24-page4.txt: [('ut', 'A'), ('recitati', 'on')]
WMH19060627-V04-25-page4.txt: [('ro', 'o')]
WMH19060704-V04-26-page1.txt: [('ex', 'ample'), ('li', 'i')]
WMH19060711-V04-27-page2.txt: [('ro', 'Jo')]
WMH19060711-V04-27-page3.txt: [('Re', 'populated'), ('re', 'populated')]
WMH19060718-V04-28-page1.txt: [('mo', 'I')]
WMH19060718-V04-28-page2.txt: [('co', 'operation')]
WMH19060718-V04-28-page4.txt: [('appe', 'tites')]
WMH19060801-V04-30-page3.txt: [('ap', 'plications')]
WMH19060808-V04-31-page1.txt: [('APPRO', 'PRIATED'), ('gra', 'ger')]
WMH19060808-V04-31-page2.txt: [('th', 'at'), ('co', 'operation')]
WMH19060822-V04-32-page3.txt: [('re', 'assure')]
WMH19060905-V04-34-page2.txt: [('SPE', 'CIFIC')]
WMH19060912-V04-35-page3.txt: [('re', 'turn')]
WMH19061010-V04-39-page1.txt: [('re', 'locate')]
WMH19061017-V04-40-page1.txt: [('TA', 'RE')]
WMH19061017-V04-40-page3.txt: [('es', 'sential')]
WMH19061024-V04-41-page3.txt: [('hov', 'els')]
WMH19061031-V04-42-page1.txt: [('INTERNA', 'TIONAL')]
WMH19061107-V04-43-page1.txt: [('Wr', 'IST')]
WMH19061114-V04-44-page3.txt: [('submerg', 'ed')]
WMH19061128-V04-46-page1.txt: [('whi', 'le'), ('co', 'operation')]
WMH19061128-V04-46-page4.txt: [('co', 'operate')]
WMH19061205-V04-47-page1.txt: [('moun', 'tains')]
WMH19061226-V04-50-page1.txt: [('municipali', 'ty')]
WMH19061226-V04-50-page3.txt: [('consi', 'dered')]
WMH19070102-V05-01-page1.txt: [('confere', 'e')]
WMH19070102-V05-01-page2.txt: [('Scandi', 'navian'), ('educa', 'tional')]
WMH19070109-V05-02-page1.txt: [('co', 'operate'), ('desti', 'tute')]
WMH19070109-V05-02-page3.txt: [('Meri', 'dian')]
WMH19070116-V05-03-page2.txt: [('resum', 'ing')]
WMH19070116-V05-03-page3.txt: [('founda', 'tion')]
WMH19070123-V05-04-page1.txt: [('Ti', 'the')]
WMH19070123-V05-04-page3.txt: [('secur', 'ing'), ('Co', 'operation'), ('co', 'operation'), ('includ', 'ing')]
WMH19070130-V05-05-page4.txt: [('RE', 'PORTS')]
WMH19070206-V05-06-page4.txt: [('co', 'operate')]
WMH19070213-V05-07-page2.txt: [('co', 'operate')]
WMH19070227-V05-09-page2.txt: [('Ti', 'the')]
WMH19070306-V05-10-page2.txt: [('requisi', 'tes')]
WMH19070306-V05-10-page3.txt: [('thi', 'nking'), ('pre', 'sented')]
WMH19070313-V05-11-page4.txt: [('es', 't')]
WMH19070327-V05-13-page2.txt: [('disci', 'pline')]
WMH19070327-V05-13-page3.txt: [('consci', 'entious')]
WMH19070327-V05-13-page4.txt: [('RE', 'CEIVED')]
WMH19070410-V05-15-page1.txt: [('Stockda', 'le'), ('identit', 'y')]
WMH19070410-V05-15-page2.txt: [('Legis', 'lature')]
WMH19070410-V05-15-page3.txt: [('co', 'operation')]
WMH19070410-V05-15-page4.txt: [('Pottervi', 'lle')]
WMH19070417-V05-16-page2.txt: [('re', 'quire')]
WMH19070424-V05-17-page1.txt: [('peo', 'ple')]
WMH19070424-V05-17-page3.txt: [('es', 'to')]
WMH19070501-V05-18-page3.txt: [('accompl', 'ished')]
WMH19070508-V05-19-page1.txt: [('Co', 'operate'), ('co', 'operate')]
WMH19070515-V05-20-page2.txt: [('th', 'a')]
WMH19070529-V05-22-page2.txt: [('ele', 'vated')]
WMH19070612-V05-24-page1.txt: [('pA', 'w')]
WMH19070619-V05-25-page3.txt: [('co', 'operation')]
WMH19070703-V05-27-page3.txt: [('ca', 'm')]
WMH19070703-V05-27-page4.txt: [('th', 'a')]
WMH19070710-V05-28-page4.txt: [('th', 'e')]
WMH19070731-V05-31-page1.txt: [('re', 'No')]
WMH19070731-V05-31-page2.txt: [('re', 'opened')]
WMH19070814-V05-33-page3.txt: [('mis', 'pronounces')]
WMH19070918-V05-37-page4.txt: [('LAN', 'GUAGE')]
WMH19070925-V05-38-page1.txt: [('re', 'No')]
WMH19071002-V05-39-page1.txt: [('Sanitari', 'um'), ('re', 'locate'), ('larg', 'ely'), ('M.', '')]
WMH19071002-V05-39-page4.txt: [('Whi', 'tmarsh')]
WMH19071009-V05-40-page2.txt: [('enf', 'orced')]
WMH19071016-V05-41-page1.txt: [('un', 'Christian')]
WMH19071016-V05-41-page2.txt: [('attendi', 'ng')]
WMH19071023-V05-42-page3.txt: [('co', 'worker')]
WMH19071106-V05-44-page1.txt: [('co', 'operation')]
WMH19071106-V05-44-page2.txt: [('DISESTAB', 'LISHED')]
WMH19071106-V05-44-page4.txt: [('transla', 'tions')]
WMH19071120-V05-46-page3.txt: [('re', 'enlisted')]
WMH19071120-V05-46-page4.txt: [('bi', 'nary')]
WMH19071127-V05-47-page3.txt: [('RE', 'VIEW')]
WMH19071211-V05-49-page1.txt: [('Sa', 'tan')]
WMH19071211-V05-49-page4.txt: [('Ti', 'the')]
WMH19071218-V05-50-page1.txt: [('Notwith', 'standing')]
WMH19071218-V05-50-page3.txt: [('criti', 'cising'), ('recom', 'mendation')]
WMH19080101-V06-01-page4.txt: [('Otseg', 'o')]
WMH19080108-V06-02-page2.txt: [('AMA', 'DON')]
WMH19080108-V06-02-page4.txt: [('th', 'at')]
WMH19080115-V06-03-page2.txt: [('re', 'enact')]
WMH19080115-V06-03-page3.txt: [('distri', 'bution'), ('ro', 'o')]
WMH19080122-V06-04-page4.txt: [('Wellspri', 'ng')]
WMH19080129-V06-05-page3.txt: [('co', 'operates')]
WMH19080205-V06-06-page3.txt: [('counte', 'nance'), ('Brot', 'her')]
WMH19080212-V06-07-page1.txt: [('Sa', 'bbath')]
WMH19080212-V06-07-page4.txt: [('Se', 'Lected')]
WMH19080219-V06-08-page2.txt: [('Ay', 'ars')]
WMH19080226-V06-09-page1.txt: [('co', 'operate')]
WMH19080226-V06-09-page2.txt: [('commi', 'ttees')]
WMH19080304-V06-10-page1.txt: [('tA', 'M')]
WMH19080304-V06-10-page4.txt: [('discipl', 'ine'), ('un', 'planned')]
WMH19080311-V06-11-page1.txt: [('famil', 'iar')]
WMH19080311-V06-11-page4.txt: [('co', 'operation')]
WMH19080325-V06-13-page1.txt: [('re', 'read'), ('Ev', 'a')]
WMH19080415-V06-16-page1.txt: [('EDUCATIONA', 'L'), ('Mc', 'Reynolds')]
WMH19080415-V06-16-page4.txt: [('th', 'or')]
WMH19080422-V06-17-page2.txt: [('Pottervi', 'lle')]
WMH19080429-V06-18-page1.txt: [('co', 'operate')]
WMH19080506-V06-19-page4.txt: [('ren', 'a')]
WMH19080513-V06-20-page1.txt: [('mo', 'rA'), ('co', 'operate')]
WMH19080513-V06-20-page2.txt: [('co', 'operation'), ('sor', 'a')]
WMH19080603-V06-23-page1.txt: [('mechani', 'Cal')]
WMH19080603-V06-23-page3.txt: [('co', 'operative')]
WMH19080617-V06-25-page2.txt: [('includ', 'ing')]
WMH19080701-V06-27-page1.txt: [('ex', 'pressive')]
WMH19080701-V06-27-page2.txt: [('re', 'union')]
WMH19080708-V06-28-page1.txt: [('FI', 'ELD')]
WMH19080708-V06-28-page3.txt: [('retur', 'ned')]
WMH19080715-V06-29-page1.txt: [('co', 'operation')]
WMH19080715-V06-29-page3.txt: [('th', 'at')]
WMH19080722-V06-30-page3.txt: [('ob', 'jections')]
WMH19080722-V06-30-page4.txt: [('DEYOU', 'NG')]
WMH19080729-V06-31-page1.txt: [('al', 'ways')]
WMH19080805-V06-32-page2.txt: [('re', 'sided')]
WMH19080812-V06-33-page3.txt: [('GENER', 'AL')]
WMH19080909-V06-36-page2.txt: [('auspi', 'ces')]
WMH19080916-V06-37-page2.txt: [('co', 'workers')]
WMH19080923-V06-38-page2.txt: [('Educa', 'tional')]
WMH19080930-V06-39-page1.txt: [('gi', 'a')]
WMH19080930-V06-39-page2.txt: [('prepar', 'ed')]
WMH19081007-V06-40-page4.txt: [('Al', 'ma')]
WMH19081028-V06-43-page1.txt: [('gos', 'pel')]
WMH19081028-V06-43-page2.txt: [('RE', 'QUIRED')]
WMH19081104-V06-44-page4.txt: [('MICHI', 'GAN')]
WMH19081111-V06-45-page1.txt: [('na', 'tives')]
In [42]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction5 Average verified rate: 0.9775893748620843 Average of error rates: 0.02419558964525408 Total token count: 915414
In [43]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[43]:
[('m', 1749),
('w', 1503),
('g', 1438),
('e', 1316),
('d', 1279),
('r', 687),
('n', 642),
("'", 490),
('f', 446),
('t', 381),
('th', 275),
('oo', 171),
('sabbathschool', 163),
('io', 115),
('mt', 108),
('k', 107),
('co', 101),
('ro', 96),
('wm', 83),
('numbess', 75),
('u', 70),
("canvassers'", 58),
('x', 46),
('horr', 39),
("the'", 38),
('rd', 33),
('blendon', 32),
('brower', 31),
('ex', 30),
('harnden', 30),
("f'd", 30),
('mchugh', 29),
('re', 29),
('seventhday', 28),
('nd', 28),
('cleora', 27),
('q', 23),
('nunica', 23),
('sabbathschools', 23),
('-', 22),
('tion', 21),
('vowyla', 21),
('fd', 20),
('psa', 20),
('z', 20),
('loth', 20),
('numbeps', 19),
('ioo', 19),
('drury', 18),
('hoffstra', 18)]
Correction 6 -- Rejoin Split Words II¶
In [45]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction6"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
errors = reports.identify_errors(tokens, spelling_dictionary)
replacements = clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=True)
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_split_words(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
WMH19030415-V01-15-page4.txt: [('Confer', 'ence'), ('Gene', 'al'), ('Vermon', 'tville')]
WMH19030520-V01-20-page4.txt: [('depart', 'ment')]
WMH19030603-V01-22-page2.txt: [('CON', 'FERENCE')]
WMH19030701-V01-26-page1.txt: [('DEPART', 'MENT')]
WMH19030722-V01-29-page1.txt: [('r', 'esented')]
WMH19030722-V01-29-page4.txt: [('cam', 'pmeeting')]
WMH19031118-V01-46-page1.txt: [('mission', 'ary')]
WMH19031118-V01-46-page3.txt: [('in', 'stil')]
WMH19040127-V02-04-page4.txt: [('improve', 'ments')]
WMH19040203-V02-05-page4.txt: [('at', 'tention'), ('INSTRUCT', 'ORS')]
WMH19040210-V02-06-page4.txt: [('San', 'itarium')]
WMH19040224-V02-08-page1.txt: [('priv', 'ilege')]
WMH19040224-V02-08-page4.txt: [('S', 'hool')]
WMH19040302-V02-09-page2.txt: [('atone', 'ment')]
WMH19040323-V02-12-page1.txt: [('in', 'terest')]
WMH19040330-V02-13-page4.txt: [('maili', 'ng')]
WMH19040413-V02-15-page3.txt: [('an', 'nouncing')]
WMH19040413-V02-15-page4.txt: [('SOUTH', 'ERN')]
WMH19040420-V02-16-page1.txt: [('treasur', 'ers')]
WMH19040504-V02-18-page2.txt: [('m', 'ost')]
WMH19040504-V02-18-page4.txt: [('kin', 'gdom')]
WMH19040518-V02-20-page4.txt: [('per', 'sonal')]
WMH19040608-V02-23-page4.txt: [('Sec', 'retary'), ('Comm', 'ittee')]
WMH19040629-V02-25-page4.txt: [('T', 'oo')]
WMH19040706-V02-26-page3.txt: [('world', 'liness')]
WMH19040706-V02-26-page4.txt: [('Ed', 'itor')]
WMH19040720-V02-28-page1.txt: [('rea', 'ppear'), ('re', 'ligious')]
WMH19040803-V02-30-page4.txt: [('n', 'ay')]
WMH19040810-V02-31-page2.txt: [('r', 'emove')]
WMH19040817-V02-32-page1.txt: [('ha', 'lf')]
WMH19040914-V02-34-page3.txt: [('Bes', 'sie')]
WMH19041005-V02-36-page3.txt: [('THANK', 'FUL')]
WMH19041005-V02-36-page4.txt: [('r', 'oo')]
WMH19041019-V02-38-page4.txt: [('Bat', 'tle')]
WMH19041026-V02-39-page2.txt: [('EN', 'GAGED')]
WMH19041102-V02-40-page3.txt: [('connec', 'tion')]
WMH19041109-V02-41-page1.txt: [('San', 'itarium')]
WMH19041130-V02-44-page2.txt: [('a', 'ssociation')]
WMH19041130-V02-44-page3.txt: [('depart', 'ment')]
WMH19041207-V02-45-page2.txt: [('confer', 'ence')]
WMH19041207-V02-45-page4.txt: [('elim', 'inated'), ('reabsorp', 'tion')]
WMH19041221-V02-47-page3.txt: [('REG', 'ULAR')]
WMH19041228-V02-48-page1.txt: [('and', 're')]
WMH19050104-V03-01-page1.txt: [('to', 'ft')]
WMH19050104-V03-01-page4.txt: [('t', 'wo')]
WMH19050111-V03-02-page2.txt: [('Wash', 'ington')]
WMH19050111-V03-02-page4.txt: [('faith', 'ful')]
WMH19050201-V03-04-page3.txt: [('K', 'inderhook')]
WMH19050222-V03-07-page3.txt: [('CAN', 'VASSERS')]
WMH19050322-V03-11-page6.txt: [('A', 'nna'), ('vi', 'ne')]
WMH19050405-V03-13-page1.txt: [('CON', 'SIDER')]
WMH19050419-V03-15-page1.txt: [('confer', 'ence')]
WMH19050419-V03-15-page3.txt: [('So', 'ciety')]
WMH19050503-V03-17-page3.txt: [('par', 'ents')]
WMH19050510-V03-18-page1.txt: [('teach', 'ers')]
WMH19050517-V03-19-page4.txt: [('Confer', 'ence')]
WMH19050531-V03-21-page3.txt: [('MICH', 'IGAN')]
WMH19050614-V03-23-page1.txt: [('ex', 'penses')]
WMH19050621-V03-24-page4.txt: [('E', 'ndureth')]
WMH19050726-V03-29-page1.txt: [('Publ', 'ic')]
WMH19050802-V03-30-page2.txt: [('deliver', 'ance')]
WMH19050830-V03-33-page3.txt: [('ap', 'plicable')]
WMH19050920-V03-36-page1.txt: [('V', 'ideto')]
WMH19051004-V03-38-page1.txt: [('pro', 'phetic')]
WMH19051004-V03-38-page3.txt: [('A', 'nd')]
WMH19051025-V03-41-page1.txt: [('H', 'artwell')]
WMH19051025-V03-41-page3.txt: [('pro', 'vides')]
WMH19051025-V03-41-page4.txt: [('M', 'adison')]
WMH19051129-V03-46-page1.txt: [('Ed', 'uc')]
WMH19051129-V03-46-page2.txt: [('The', 're')]
WMH19051129-V03-46-page3.txt: [('sol', 'emn')]
WMH19051129-V03-46-page4.txt: [('LIT', 'TLE')]
WMH19051213-V03-48-page2.txt: [('POT', 'TERVILLE')]
WMH19051213-V03-48-page3.txt: [('re', 'vived')]
WMH19060103-V04-01-page1.txt: [('The', 're')]
WMH19060117-V04-03-page1.txt: [('Mon', 'tcalm')]
WMH19060117-V04-03-page2.txt: [('CONFER', 'ENCE')]
WMH19060124-V04-04-page1.txt: [('Bar', 'ry')]
WMH19060131-V04-05-page1.txt: [('g', 'oo')]
WMH19060131-V04-05-page2.txt: [('of', 'ficer')]
WMH19060131-V04-05-page4.txt: [('so', 'journ')]
WMH19060214-V04-07-page2.txt: [('o', 'ff')]
WMH19060221-V04-08-page2.txt: [('Mar', 'garet')]
WMH19060228-V04-09-page1.txt: [('to', 'co'), ('Janu', 'ary')]
WMH19060307-V04-10-page1.txt: [('Com', 'mittee')]
WMH19060307-V04-10-page3.txt: [('teach', "ers'")]
WMH19060314-V04-11-page1.txt: [('to', 'co')]
WMH19060321-V04-12-page1.txt: [('temporari', 'ly'), ('con', 'ference')]
WMH19060411-V04-15-page1.txt: [('to', 'co'), ('GATH', 'ERETH')]
WMH19060411-V04-15-page2.txt: [('o', 'ro')]
WMH19060502-V04-18-page1.txt: [('r', 'INER')]
WMH19060502-V04-18-page3.txt: [('arrange', 'ment')]
WMH19060509-V04-19-page2.txt: [('state', 'ment')]
WMH19060606-V04-22-page1.txt: [('or', 'dained')]
WMH19060627-V04-25-page1.txt: [('O', 'ft')]
WMH19060711-V04-27-page1.txt: [('r', 'Ef')]
WMH19060711-V04-27-page2.txt: [('to', 'ro')]
WMH19060711-V04-27-page4.txt: [('t', 'oo')]
WMH19060725-V04-29-page2.txt: [('we', 're')]
WMH19060725-V04-29-page3.txt: [('COL', 'LEGE')]
WMH19060808-V04-31-page1.txt: [('CAN', 'VASSERS')]
WMH19060822-V04-32-page3.txt: [('Mes', 'siah')]
WMH19060905-V04-34-page3.txt: [('Confer', 'ence')]
WMH19060912-V04-35-page3.txt: [('to', 're')]
WMH19060926-V04-37-page2.txt: [('wei', 'ght')]
WMH19061017-V04-40-page1.txt: [('EAST', 'ERN')]
WMH19061017-V04-40-page3.txt: [('es', 'sential')]
WMH19061031-V04-42-page3.txt: [('de', 'cide')]
WMH19061114-V04-44-page4.txt: [('A', 'ncient'), ('to', 'ro')]
WMH19061205-V04-47-page2.txt: [('wrest', 'lers')]
WMH19061205-V04-47-page4.txt: [('D', 'ecember')]
WMH19070102-V05-01-page1.txt: [('g', 'oo')]
WMH19070102-V05-01-page3.txt: [('t', 'hrough')]
WMH19070109-V05-02-page1.txt: [('A', 'KA')]
WMH19070109-V05-02-page2.txt: [('prom', 'ised')]
WMH19070116-V05-03-page3.txt: [('founda', 'tion')]
WMH19070306-V05-10-page3.txt: [('thi', 'nking')]
WMH19070327-V05-13-page3.txt: [('consci', 'entious')]
WMH19070417-V05-16-page3.txt: [('u', 'nfailing')]
WMH19070424-V05-17-page1.txt: [('peo', 'ple')]
WMH19070424-V05-17-page2.txt: [('IN', 'STRUCTOR')]
WMH19070515-V05-20-page2.txt: [('di', 'fficulties')]
WMH19070605-V05-23-page2.txt: [('ha', 'th')]
WMH19070814-V05-33-page4.txt: [('Mon', 'tcalm')]
WMH19070828-V05-34-page3.txt: [('rat', 'es')]
WMH19070911-V05-36-page1.txt: [('d', 'ay'), ('to', 'ri')]
WMH19071002-V05-39-page1.txt: [('M', 'ichigan')]
WMH19071016-V05-41-page4.txt: [('go', 'od')]
WMH19071106-V05-44-page1.txt: [('SUPER', 'IOR')]
WMH19071106-V05-44-page2.txt: [('the', 'ist')]
WMH19071106-V05-44-page4.txt: [('t', 'Aro')]
WMH19071120-V05-46-page1.txt: [('ans', 'wer')]
WMH19071120-V05-46-page3.txt: [('w', 'hich')]
WMH19071211-V05-49-page2.txt: [('t', 'oo')]
WMH19071218-V05-50-page2.txt: [('a', 'nd')]
WMH19071218-V05-50-page3.txt: [('recom', 'mendation')]
WMH19080101-V06-01-page4.txt: [('the', 'ft')]
WMH19080115-V06-03-page3.txt: [('distri', 'bution')]
WMH19080122-V06-04-page3.txt: [('r', 'eligious')]
WMH19080129-V06-05-page4.txt: [('black', 'smithing')]
WMH19080212-V06-07-page2.txt: [('o', 'ro')]
WMH19080219-V06-08-page4.txt: [('pres', 'ent')]
WMH19080304-V06-10-page2.txt: [('period', 'icals')]
WMH19080311-V06-11-page1.txt: [('famil', 'iar')]
WMH19080401-V06-14-page2.txt: [('Sec', 'retaries')]
WMH19080401-V06-14-page3.txt: [('an', 'ther')]
WMH19080422-V06-17-page3.txt: [('for', 'th')]
WMH19080429-V06-18-page4.txt: [('Stu', "dents'")]
WMH19080506-V06-19-page1.txt: [('con', 'vention')]
WMH19080513-V06-20-page1.txt: [('con', 'tributions'), ('mo', 'rA')]
WMH19080513-V06-20-page2.txt: [('num', 'ber')]
WMH19080520-V06-21-page1.txt: [('e', 'rr'), ('A', 'MO')]
WMH19080520-V06-21-page2.txt: [('den', 'ial')]
WMH19080520-V06-21-page3.txt: [('o', 'ro')]
WMH19080603-V06-23-page1.txt: [('M', 'ICHIGAN')]
WMH19080610-V06-24-page4.txt: [('Adv', 'ent')]
WMH19080708-V06-28-page1.txt: [('k', 'AW')]
WMH19080708-V06-28-page3.txt: [('Bap', 'tist'), ('in', 'vited')]
WMH19080715-V06-29-page2.txt: [('A', 'dventists')]
WMH19080722-V06-30-page2.txt: [('LIB', 'ERTY'), ('John', 'ston')]
WMH19080805-V06-32-page2.txt: [('near', 'ly')]
WMH19080826-V06-34-page3.txt: [('class', 'es')]
WMH19080909-V06-36-page2.txt: [('auspi', 'ces')]
WMH19080916-V06-37-page3.txt: [('shad', 'ow')]
WMH19080916-V06-37-page4.txt: [('and', 're')]
WMH19080923-V06-38-page1.txt: [('t', 'IE')]
WMH19080923-V06-38-page4.txt: [('Pro', 'fessor')]
WMH19080930-V06-39-page1.txt: [('abo', 'ut')]
WMH19081007-V06-40-page4.txt: [('min', 'isters')]
WMH19081014-V06-41-page1.txt: [('i', 'ri')]
WMH19081014-V06-41-page2.txt: [('o', 'ro')]
WMH19081028-V06-43-page1.txt: [('gos', 'pel')]
WMH19081104-V06-44-page4.txt: [('a', 're')]
WMH19081111-V06-45-page5.txt: [('He', 'ra')]
In [48]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction6 Average verified rate: 0.977728102578558 Average of error rates: 0.024010546500479388 Total token count: 915279
In [49]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[49]:
[('m', 1746),
('w', 1502),
('g', 1438),
('e', 1316),
('d', 1278),
('r', 684),
('n', 641),
("'", 490),
('f', 446),
('t', 378),
('th', 273),
('oo', 171),
('sabbathschool', 163),
('io', 115),
('mt', 108),
('k', 105),
('co', 98),
('ro', 94),
('wm', 83),
('numbess', 75),
('u', 70),
("canvassers'", 58),
('x', 46),
('horr', 39),
("the'", 38),
('rd', 33),
('blendon', 32),
('brower', 31),
('ex', 30),
('harnden', 30),
("f'd", 30),
('mchugh', 29),
('seventhday', 28),
('cleora', 27),
('nd', 26),
('q', 23),
('nunica', 23),
('sabbathschools', 23),
('-', 22),
('tion', 21),
('vowyla', 21),
('re', 21),
('fd', 20),
('psa', 20),
('z', 20),
('loth', 20),
('numbeps', 19),
('ioo', 19),
('drury', 18),
('hoffstra', 18)]
Review Remaining Errors¶
In [50]:
reports.docs_with_high_error_rate(summary)
Out[50]:
[('WMH19081111-V06-45-page5.txt', 0.455)]
In [52]:
# %load shared_elements/high_error_rates.py
doc_keys = [x[0] for x in reports.docs_with_high_error_rate(summary) if x[1] > 0.2]
utilities.open_original_docs(doc_keys, directories['cycle'])
Opened files: WMH19081111-V06-45-page5.txt
High error document is handwritten.
In [54]:
reports.long_errors(errors_summary, min_length=15)
Out[54]:
(['heaven-appointed', 'tionofourownpeople', 'niialtioiaaavaliwailio', 'unimpressionable', 'enjoyable-service', 'carefully-arranged', 'disconnectedfrom', 'csuperintendents', 'sabbath-meetings', 'distinguishingbetween', 'great-grandchildren', 'iqiiiidiiiniinii', 'stick-to-it-iveness', 'juippliympamtuuju', 'ponderousdocument', 'influentiarwriters', 'blackstring-around-the-neck', 'self-examination', 'congregegational', 'securingappointments', 'dwellingconveniences', 'all--sabbath-school', 'thought-producing', 'danish-norwegian', 'sleepfsfuicffeicse', 'counter-campaign', 'stoop-shouldered', 'disfeliowshipped', 'desire--expression', 'soul-encouraging', 'abundantresources', 'hethatreapethgatherethfruituntolifeeternal', 'instrumentterial', 'fruituntolifeeternal', 'ceremonieswholly', 'responstbilities', 'commandment-keeping', 'encouragingteachers', 'bookslavebeenhandledmostly', 'relief-of-schools', 'great-responsibility', 'trailting-school', 'irrilirrimiiimiptimpiiir', 'eleven-twentieths', 'self-gratification', "under'compulsion", 'twatmanypersonswouldgivea', 'christianindividuals', 'southernlllinois', 'comparativelysmall', 'weddingring-i-ub', 'seventy-thousand', 'arithmetic--decimal', 'reapetagattiereth', "government--smith's", "fifteen-minutes'", 'solemnmea-ningt---a-', 'northmichigancamp-meetingat', 'receiptrnargaret', 'concerningemmarrual', 'self-satisfaction', 'wanted--assurance', 'unproductiveness', 'never-tobe-omitted', 'the-sabbath-school', 'imiminiiiiimicermin', 'over-development', 'overly-sensitive', 'stumbling-blocks', 'self-opinionated', "hartwelljn'behalf", 'onion-in-the-pocket', 'withindifference', 'selfconsciousness', 'annakemstraannddolivercrumb', 'ntalliscomparativelyquiet', 'drills--penmanship', 'cut-price-combination', 'over-enthusiastic', "iiiiiiiiiniiffffr'", 'iiiiiiviiiiiimunimmi'], 15)
Correction 7 -- Remove Long Error Tokens¶
In [57]:
# %load shared_elements/remove-tokens-with-long-strings-of-characters.py
prev = cycle
cycle = "correction7"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = utilities.tokenize_text(text)
replacements = []
replacements.append(clean.check_for_repeating_characters(tokens, "i|I"))
replacements = [item for sublist in replacements for item in sublist]
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
WMH19050104-V03-01-page4.txt: [('IIIIIIVIIIIIIMUNIMMI', ' ')]
WMH19060321-V04-12-page1.txt: [("IIIIIIIIINIIffffr'", ' ')]
WMH19080729-V06-31-page1.txt: [('IQIIIIdIIINIINII', ' ')]
Correction 8 -- Separate Squashed Words¶
In [59]:
# %load shared_elements/separate_squashed_words.py
import pandas as pd
from math import log
prev = cycle
cycle = "correction8"
directories = utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
verified_tokens = []
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
clean.get_approved_tokens(content, spelling_dictionary, verified_tokens)
tokens_with_freq = dict(collections.Counter(verified_tokens))
words = pd.DataFrame(list(tokens_with_freq.items()), columns=['token','freq'])
words_sorted = words.sort_values('freq', ascending=False)
words_sorted_short = words_sorted[words_sorted.freq > 2]
sorted_list_of_words = list(words_sorted_short['token'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = utilities.readfile(directories['prev'], filename)
text = utilities.strip_punct(content)
tokens = utilities.tokenize_text(text)
wordcost = dict((k, log((i+1)*log(len(sorted_list_of_words)))) for i,k in enumerate(sorted_list_of_words))
maxword = max(len(x) for x in sorted_list_of_words)
replacements = []
for token in tokens:
if not token.lower() in spelling_dictionary:
if len(token) > 17:
if re.search(r"[\-\-\'\"]", token):
pass
else:
split_string = clean.infer_spaces(token, wordcost, maxword)
list_split_string = split_string.split()
if clean.verify_split_string(list_split_string, spelling_dictionary):
replacements.append((token, split_string))
else:
pass
else:
pass
else:
pass
if len(replacements) > 0:
print("{}: {}".format(filename, replacements))
for replacement in replacements:
content = clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
WMH19030603-V01-22-page3.txt: [('distinguishingbetween', 'distinguish ing between')]
WMH19040106-V02-02-page1.txt: [('Christianindividuals', 'Christian individuals')]
WMH19040928-V02-35-page2.txt: [('NIIALTIOIAAAVALIWAILIO', 'N I I A L T I O I A A A V A L I W A I L I O')]
WMH19050201-V03-04-page2.txt: [('irrilirrIMIIIMIPTIMPIIIR', 'ir r i l i r r I M I I I M I P T I M P I I I R')]
WMH19060117-V04-03-page1.txt: [('HETHATREAPETHGATHERETHFRUITUNTOLIFEETERNAL', 'HE THAT REAPETH GATHERETH FRUIT UNTO LIFE ETERNAL')]
WMH19060718-V04-28-page2.txt: [('encouragingteachers', 'encouraging teachers')]
WMH19060725-V04-29-page1.txt: [('FRUITUNTOLIFEETERNAL', 'FRUIT UNTO LIFE ETERNAL')]
WMH19060919-V04-36-page1.txt: [('comparativelysmall', 'comparatively small')]
WMH19080408-V06-15-page1.txt: [('bookslavebeenhandledmostly', 'book slave been handled mostly')]
WMH19080722-V06-30-page2.txt: [('securingappointments', 'securing appointments')]
WMH19080909-V06-36-page1.txt: [('HETHATREAPETHGATHERETHFRUITUNTOLIFEETERNAL', 'HE THAT REAPETH GATHERETH FRUIT UNTO LIFE ETERNAL')]
In [62]:
# %load shared_elements/summary.py
summary = reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/WMH/correction8 Average verified rate: 0.9777330102496761 Average of error rates: 0.024005752636625122 Total token count: 915346
In [63]:
# %load shared_elements/top_errors.py
errors_summary = reports.get_errors_summary( summary )
reports.top_errors( errors_summary, 10 )[:50]
Out[63]:
[('m', 1749),
('w', 1503),
('g', 1438),
('e', 1316),
('d', 1278),
('r', 688),
('n', 642),
("'", 490),
('f', 446),
('t', 380),
('th', 273),
('oo', 171),
('sabbathschool', 163),
('io', 115),
('mt', 108),
('k', 105),
('co', 98),
('ro', 94),
('wm', 83),
('numbess', 75),
('u', 70),
("canvassers'", 58),
('x', 46),
('horr', 39),
("the'", 38),
('rd', 33),
('blendon', 32),
('brower', 31),
('ex', 30),
('harnden', 30),
("f'd", 30),
('mchugh', 29),
('seventhday', 28),
('cleora', 27),
('nd', 26),
('q', 23),
('nunica', 23),
('sabbathschools', 23),
('-', 22),
('tion', 21),
('vowyla', 21),
('re', 21),
('fd', 20),
('psa', 20),
('z', 20),
('loth', 20),
('numbeps', 19),
('ioo', 19),
('drury', 18),
('hoffstra', 18)]
In [ ]: