PHJ-OCR-Evaluation-and-Correction
In [1]:
%load_ext autoreload
In [2]:
%autoreload 2
In [3]:
from text2topics import reports
from text2topics import utilities
from text2topics import clean
import re
import os
from os import listdir
from os.path import isfile, join
import collections
In [4]:
%matplotlib inline
In [5]:
wordlist_dir = "/Users/jeriwieringa/Dissertation/drafts/data/word-lists"
wordlists = ["2016-12-07-SDA-last-names.txt",
"2016-12-07-SDA-place-names.txt",
"2016-12-08-SDA-Vocabulary.txt",
"2017-01-03-place-names.txt",
"2017-02-14-Base-Word-List-SCOWL&KJV.txt",
"2017-02-14-Roman-Numerals.txt",
"2017-03-01-Additional-Approved-Words.txt"
]
In [6]:
spelling_dictionary = GoH.utilities.create_spelling_dictionary(wordlists, wordlist_dir)
In [7]:
title = "PHJ"
In [8]:
base_dir = "/Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/{}/".format(title)
Baseline¶
In [9]:
cycle = 'baseline'
In [10]:
stats = GoH.reports.overview_report(join(base_dir, cycle), spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/PHJ/baseline Average verified rate: 0.9485069283287639 Average of error rates: 0.06036458564763386 Total token count: 2897885
In [11]:
errors_summary = GoH.reports.get_errors_summary( stats )
GoH.reports.top_errors( errors_summary, 200 )
Out[11]:
[('-', 3311), ('m', 2671), ('d', 2283), ('¥', 2135), ("'", 1992), ('ñ', 1991), ('tion', 1419), ('con-', 1357), ('in-', 1275), ('e', 1273), ('re-', 1243), ('r', 1143), ('w', 1134), ('co', 1092), ('t', 1026), ('n', 969), (')', 878), ('be-', 795), ('ex-', 740), ('g', 726), ('f', 721), ('de-', 691), ('com-', 656), ('ment', 649), ('dis-', 642), ('lb', 585), ('pro-', 507), ('x', 490), ('im-', 452), ('per-', 408), ('un-', 407), ('(', 404), ('tions', 379), ('pre-', 347), ('ful', 334), ('sub-', 329), ('ad-', 328), ('ñthe', 326), ('en-', 307), ('ments', 272), ('al-', 257), ('th', 255), ('*', 250), ('ap-', 249), ('ac-', 248), ('/', 238), ('oo', 229), ('ñselected', 228), ('ñsel', 227), ('ous', 225), ('mo', 225), ('pp', 223), ('sup-', 209), ('%', 207), ('to-', 201)]
Review Special Character Use¶
In [12]:
GoH.reports.tokens_with_special_characters(errors_summary)
Out[12]:
[('¥', 2135), ('ñ', 1991), (')', 878), ('(', 404), ('ñthe', 326), ('*', 250), ('/', 238), ('ñselected', 228), ('ñsel', 227), ('%', 207), ('_', 180), ('+', 172), ('ñthis', 168), ('¡', 158), ('ñthese', 155), (']', 154), ('•', 146), ('ña', 143), ("(misses')", 139), ("(ladies')", 124), ('ã', 123), ('\\', 111), ('¥¥', 102), ('=', 92), ('ñall', 92), ('ñby', 89), ('ñdr', 85), ('ñmrs', 82), ("(children's)", 81), ('societyñmrs', 80), ('societyñl', 78), ('(to', 74), ('ñone', 66), ('ô', 65), ('ñcomposed', 65), ('(the', 63), ('ñtake', 62), ('ñand', 55), ('ñmedical', 54), ('ñmr', 51), ('(and', 51), ('ñto', 51), ('(in', 48), ('ñex', 48), ('ñit', 47), ('ñin', 45), ('ñwe', 45), ('ñgood', 43), ('ñtwo', 41), ('ñan', 40), ('`', 39), ('ñnew', 39), ('ñboston', 39), ('(especially', 38), ('bandage)', 37), ('large)', 37), ('(monthly', 37), ('societyña', 37), ('(elastic)', 37), ('(which', 36), ('(continued', 36), ('(extra', 36), ('%x', 35), ('ñchristian', 35), ('ñmade', 35), ('ñfor', 35), ('bookñ', 34), ('(not', 34), ('(dyspeptic)', 34), ('ñslightly', 33), ('(or', 33), ('ñshortened', 33), ('ñthat', 33), ('societyñj', 33), ('(bosom)', 33), ('(poetry)', 32), ('ñj', 32), ('monthlyñdevoted', 32), ('ñmost', 31), ('societyñhattie', 31), ('o¡', 30), ('(size', 30), ('(i)', 30), ('societyñs', 30), ('[', 30), ('vermontñlizzie', 29), ('preventiveñsimple', 28), ('ñt', 28), ('societyñleroy', 28), ('zealandñinternational', 28), ('australiañecho', 28), ('corset)', 27), ('ñno', 27), ('virginiañamy', 27), ('ñmiss', 26), ('ñespecially', 26), ('[from', 26), ('societyñm', 25), ('}', 25), ('(diabetes)', 25), ('(i', 24), ('ñamerican', 24), ('islandñbernard', 24), ('(concluded', 24), ('ñed', 23), ('(as', 23), ('\\\\', 23), ('japanña', 23), ('ñst', 23), ('ñsame', 23), ('recipesñpost-paid', 23), ('¥¥¥', 22), ("dies')", 22), ('(la-', 22), ('africañinternational', 22), ('ñnot', 21), ('[the', 21), ('¥¥¥¥', 21), ('i)', 20), ('fasteningñwith', 20), ('ets)', 20), ('islandsñinternational', 20), ('ñamong', 20), ('ñe', 20), ('virginiañmrs', 20), ('(wheat)', 20), ('societyñc', 20), ('ñtroubles', 20), ('<', 19), ('(r)', 19), ('ñelder', 19), ('carolinañn', 18), ('ñwhat', 18), ('ñn', 18), ('ñbecause', 18), ('(of', 18), ('ñour', 18), ('ñdietetic', 18), ('ñif', 18), ('[supt', 17), ('ñjohn', 17), ('ñhow', 17), ('callñyour', 17), ('(a', 17), ('ñcook', 17), ('societyñjohn', 17), ('ñrev', 16), ("ñhall's", 16), ('englandñn', 16), ('ñpare', 16), ('ñis', 16), ('pennell=suydam', 16), ('ñhealth', 16), ('ñscientific', 16), ('water)', 16), ('ñfig', 16), ('ñpacific', 16), ('(if', 16), ('ñchicago', 16), ("ñladies'", 16), ('mechanicalñmassage', 15), ('ñfrom', 15), ('{no', 15), ('ñs', 15), ('societyñturner', 15), ('ñella', 15), ('ñdio', 15), ("(child'n's)", 15), ('ñherald', 15), ('movementsñmanual', 15), ('switzerlandñimprimerie', 15), ('ñboil', 15), ('dore)', 15), ('ñwhen', 14), ("¥'", 14), ('ñphiladelphia', 14), ('{', 14), ('louisianaña', 14), ('ñthree', 14), ('ñm', 14), ('ñjournal', 14), ('[superintendent', 14), ('ñbeat', 13), ('ñsoak', 13), ('(for', 13), ('ñwash', 13), ('(egypt)', 13), ('ñprepare', 13), ('resortñsteamers', 13), ('(b)', 13), ('societyñf', 13), ('menña', 13), ('ñhousekeeper', 13), ('(a)', 13), ('ñmary', 13), ('land)', 13), ('-¥', 12), ('(illustrated)', 12), ('(holy', 12), ('flordiañcharles', 12), ('switzerlandñ', 12), ('+r', 12), ('breathingñas', 12), ('ãã', 12), ('ñyes', 12), ('temperanceñembracing', 12), ('ñmanual', 12), ('societyñcharles', 12), ('it)', 12), ('(this', 12), ('ñsilicate', 12), ('(island', 12), ('societyñlock', 11), ('ñplease', 11), ('••', 11), ('t¥', 11), ('(mich', 11), ('floridañcharles', 11), ('germanyñl', 11), ("'¥", 11), ('columbiañu', 11), ('(from', 11), ('ñlondon', 11), ('pacificñn', 11), ('(some', 11), ('stateñj', 11), ('deming=', 11), ('ñi', 11), ('\ufeff', 11), ('to¥', 11), ('(c)', 11), ('temperance)', 11), ('ñantiseptic', 11), ('\\fully', 10), ('(t)', 10), ('ñprof', 10), ('ñpopular', 10), ('street)', 10), ('(n', 10), ('(no', 10), ('ñexchange', 10), ('#', 10), ('ñput', 10), ('(england)', 10), ('societyñclara', 10), ('—', 10), ('[physician', 10), ('¥-', 10), ('c)', 10), ('(literature', 10), ('/x', 10), ('¥the', 10), ('evilñsolitary', 10), ('ñbritish', 10), ('(incorporated)', 10), ('ñafter', 10), ('ñsanitary', 9), ('(one', 9), ('societyñeliza', 9), ('(fig', 9), ('ñf', 9), ('englandñthe', 9), ('(an', 9), ('ñc', 9), ('ñthere', 9), ('ó', 9), ('cineñpersonal', 9), ('ñof', 9), ("ñharper's", 9), ('pages)', 9), ('ñbut', 9), ('[director', 9), ('ñsubscribers', 9), ('(new', 9), ('ñsome', 9), ('ñkeep', 9), ('ñsacramento', 9), ('~~', 9), ('ñgerman', 9), ('dietñfruits', 9), ('ñfood', 9), ('crackersñthese', 9), ('`¥', 9), ('tennesseeñj', 9), ('(by', 9), ('caf\x8e', 9), ('echoñ', 9), ('ñdetroit', 9), ('itñ', 9), ('norwayñsundhedsbladet', 8), ('„', 8), ('¥a', 8), ('(that', 8), ('societyñw', 8), ('\\\\\\', 8), ('youñthat', 8), ('i¥', 8), ('**', 8), ('georgiañcharles', 8), ('ñh', 8), ('columbiañmr', 8), ('ñfirst', 8), ('(with', 8), ('senti\\el', 8), ('\\ve', 8), ('best)', 8), ('a¥', 8), ('(gen', 8), ('islandñe', 8), ('ñhe', 8), ('(dia-', 8), ('ñd', 8), ('ñen', 8), ('kentuckyñelsie', 8), ('betes)', 8), ('ñfarm', 8), ('(so', 8), ('ñprofessor', 8), ('canadañmrs', 8), ('ñl', 8), ('formsñreproductionñsexual', 8), ('englandñinternational', 8), ('ñmix', 8), ('ñmake', 8), ('(see', 8), ('£', 8), ('soeietyñs', 8), ('ñare', 8), ('(except', 8), ('societyñamelia', 8), ('¥¥¥¥¥', 8), ('societyñt', 8), ('ñthey', 8), ('ñshe', 7), ('them)', 7), ('ñmay', 7), ('ñsuch', 7), ('ñstew', 7), ('(though', 7), ('___', 7), ('teeth)', 7), ('yñeñsñbut', 7), ('ñhome', 7), ('ñr', 7), ('(signed)', 7), ('ñhall', 7), ('boysña', 7), ('ñwith', 7), ('(who', 7), ('ñsir', 7), ('lifeñthe', 7), ('societyñbox', 7), ('(h', 7), ('themñ', 7), ('ô¥', 7), ('ñabridged', 7), ('ñbread', 7), ('mothersñdiseases', 7), ('norwayñ', 7), ('ñmassage', 7), ('girlsña', 7), ('ñuse', 7), ('kentuckyñelder', 7), ('(including', 7), ('york)', 7), ('floridañlysle', 7), ('ñpeel', 7), ('-)', 6), ('lungsñprinciples', 6), ('¥and', 6), ('(such', 6), ('ñsigns', 6), ('ò', 6), ('addressñpacific', 6), ('carolinañelder', 6), ("ñpeople's", 6), ("(childrens')", 6), ('ñwhile', 6), ('(we', 6), ('(applause', 6), ('ñsix', 6), ('ñwhich', 6), ('xl%', 6), ('itña', 6), ('ñinternational', 6), ('ñrub', 6), ('societyñno', 6), ('womenña', 6), ('^', 6), ('ñselect', 6), ('the¥', 6), ('great)', 6), ('`the', 6), ('ñman', 6), ('forñ', 6), ('columbiañinternational', 6), ('ñhousehold', 6), ('ñunion', 6), ('¢', 6), ('ñjudge', 6), ('ñwhere', 6), ('womenñone', 6), ('ñmany', 6), ('stripe)', 6), ('ñnational', 6), ('ñwoman', 6), ('ñcan', 6), ('__', 6), ('ñheat', 6), ('englandñwm', 6), ('ñanon', 6), ('ñsunday', 6), ('sightñlittle', 6), ('oo¡', 6), ('[of', 6), ('miles)', 6), ('¤', 6), ('(lev', 6), ('(but', 6), ('ñforñ', 6), ('ñalcohol', 6), ('pacificñjohn', 6), ('gansñdescription', 6), ('all)', 6), ('viceña', 6), ('{service', 5), ('ñas', 5), ('ñsan', 5), ('ñwho', 5), ('protein)', 5), ('\\\\n', 5), ('alabamañc', 5), ('manhoodñdangers', 5), ('hygieneñunchastityñthe', 5), ('ñsex', 5), ('ñdress', 5), ('number_', 5), ('ñfive', 5), ('_health', 5), ('(formerly', 5), ('scotiañelder', 5), ('\\\\v', 5), ('(it', 5), ('(d)', 5), ("'`", 5), ('ñgolden', 5), ('c¥', 5), ('ñgirl', 5), ('ñlaws', 5), ('>', 5), ('¥-¥', 5), ('diseaseñ', 5), ('r¥', 5), ('years)', 5), ('post=paid', 5), ('montanañwalter', 5), ('stomachsñpoints', 5), ('manña', 5), ('parlor_', 5), ('ñjames', 5), ('ñnever', 5), ('marylandñelder', 5), ('journal)', 5), ('ñclara', 5), ('ñyou', 5), ('co¥', 5), ('ñgeorge', 5), ('{coolest', 5), ('(do', 5), ('ñatlanta', 5), ('ñmargaret', 5), ('man)', 5), ('ñcalifornia', 5), ('*the', 5), ('patientñ', 5), ('ailmentsñas', 5), ('ñtomatoes', 5), ('ñeat', 5), ('ñcongregationalist', 5), ('ñinto', 5), ('p¥', 5), ('ñcincinnati', 5), ('~', 5), ('¥/', 5), ('ñhave', 5), ('ñscience', 5), ('\\t', 5), ('(she', 5), (')(', 5), ('(cal', 5), ('(she)', 5), ('motherñ', 5), ('/a', 5), ('ñ¤ñ', 5), ('\\v', 5), ('arizonañcol', 5), ('`i', 5), ('englandñpacific', 5), ('africañcharles', 5), ('ñvegetarian', 5), ('ñp', 5), ('(e)', 5), ('in¥', 5), ('(he', 5), ('ñelizabeth', 5), ('germsñof', 5), ('(nearly', 5), ('mississippiñc', 5), ('societyñfremont', 4), ('useñdiphtheria', 4), ('healthñ', 4), ('\\\\\\\\\\', 4), ('(when', 4), ('ñprov', 4), ('or¥', 4), ('ñpresent', 4), ('stateñn', 4), ('ñcause', 4), ('ñcut', 4), ('¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥', 4), ('mississippiñelder', 4), ('guianañgeorge', 4), ('days)', 4), ("ñdon't", 4), ('better)', 4), ('¥for', 4), ('poisoningñtobacco-using', 4), ('ñillustrated', 4), ('himñ', 4), ('¥i', 4), ('ñwell', 4), ('ö', 4), ('ñthen', 4), ('l¥', 4), ('[this', 4), ('absorptionñ', 4), ('it¥', 4), ('ñannals', 4), ('answeredñalcoholic', 4), ('ñgod', 4), ('societyñh', 4), ('ñgrate', 4), ('t)', 4), ('goodñbut', 4), ('ñher', 4), ('/-', 4), ('ñold', 4), ('(about', 4), ('ñunder', 4), ('teacherñ', 4), ('woman)', 4), ('ñon', 4), ('answerñ', 4), ('a*', 4), ('ñemetic', 4), ('viceñetc', 4), ('ñjuvenile', 4), ('sabbathñschool', 4), ('coffeeñten', 4), ('i\\', 4), ('ñcrushed', 4), ('(front', 4), ('ñalcoholism', 4), ('wifeñ', 4), ('ñtemperance', 4), ('a/', 4), ("'/", 4), ('soldñnot', 4), ('tahitiñthe', 4), ('great]', 4), ('ñadvance', 4), ('age)', 4), ('(there', 4), ('ñcanadian', 4), ('ñevery', 4), ('ñphillips', 4), ('ñw', 4), ('-(no', 4), ('body)', 4), ('winter_', 4), ('notñ', 4), ('day)', 4), ('`if', 4), ('temperanceñalcohol', 4), ('offñand', 4), ('-ã', 4), ('be)', 4), ('(turner', 4), ('fairñ', 4), ('pacificñj', 4), ('t_', 4), ('andñ', 4), ('kindnessñoh', 4), ('childrenñwill', 4), ('ñphrenological', 4), ('montanañd', 4), ('againñ', 4), ('=-', 4), ('thatñ', 4), ('ñmilton', 4), ('wifeñif', 4), ('alabamañd', 4), ('want==', 4), ('too¡', 4), ('(x', 4), ('*a', 4), ('barbarismñevil', 4), ('ñbible', 4), ('ñdoes', 4), ('myñ', 4), ('ñhelen', 4), ('ñpeople', 4), ('(according', 4), ('ñedward', 4), ('of¥', 4), (')-', 4), ('-¥-', 4), ('englandñs', 4), ('ñcertainly', 4), ('was)', 4), ('intemperanceñeffects', 4), ('\\n', 4), ('lifeñ', 4), ('noticeñspecial', 4), ('¥o', 4), ('tractsocietyñc', 4), ('(after', 4), ('ñrural', 4), ('-_', 4), ('is¥', 4), ('++++++++++++++++++++++++++++++', 4), ('/heart', 4), ('ñem-', 4), ('ñah', 4), ('poison)', 4), ('patentsñ', 4), ('illustratedñjust', 4), ('ú', 4), ('wellñcayenne', 3), ('ñthose', 3), ('tableña', 3), ('familyña', 3), ('ñcanon', 3), ('societyñmiss', 3), ('ñpresbyterian', 3), ('childrenñhygiene', 3), ('(k)', 3), ('zealandñiternational', 3), ('good)', 3), ('pacificñcharles', 3), ('cultureñ', 3), ('experiencesñvoiced', 3), ('valuesñyou', 3), ('(san', 3), ('above)', 3), ('***', 3), ('(all', 3), ('(enough', 3), ('pounds)', 3), ('ñcells', 3), ('gñ', 3), ('st)-', 3), ('ñb', 3), ('/t', 3), ('ñnothing', 3), ('ñbefore', 3), ('brown=fox', 3), ('ñplace', 3), ('ñpure', 3), ('societyñspringville', 3), ('nessñtea', 3), ('¥ñ', 3), ('appl/cat/on', 3), ('etry)', 3), ('canadañs', 3), ('itñcongressional', 3), ('waterfallsñpictures', 3), ('ñlook', 3), ('ñpeaches', 3), ('so)', 3), ('moreñ', 3), ('`thou', 3), ('louisianañelder', 3), ('(f)', 3), ('¥ô', 3), ('partñ', 3), ('examinedñinventory', 3), ('summer_', 3), ('@', 3), ('aloneñand', 3), ('v¥', 3), ('blossomñalcoholism', 3), ('ñpotatoes', 3), ('*extracts', 3), ('//', 3), ('ageña', 3), ('(detroit', 3), ('ñespeciall', 3), ('masterpieceñhow', 3), ('ñlight', 3), ('ñfather', 3), ('ñlowell', 3), ('a)', 3), ('oatsñgetting', 3), ('this¥', 3), ('ñtimes', 3), ('blindnessñscience', 3), ('(eng', 3), ('ñlancet', 3), ('(provided', 3), ('(h)', 3), ('se\\ti\\el', 3), ('societyñmelissa', 3), ('ñscald', 3), ('healthñhow', 3), ('rightsñthe', 3), ('healthñcare', 3), ('ñone-half', 3), ('grandeurñpeculiar', 3), ('ñjoseph', 3), ('thisña', 3), ('space]', 3), ('centsñprobably', 3), ('allñto', 3), ('societyñmts', 3), ('girlsñ', 3), ('mississippiñoscar', 3), ('ofñ', 3), ('pacificñc', 3), ('ñmuch', 3), ('surroundingsñthe', 3), ('pñ', 3), ('/i', 3), ('+t', 3), ('û', 3), ('divineñhow', 3), ('ñfigs', 3), ('(p', 3), ('ñse/', 3), ('_nteresting', 3), ('(sometimes', 3), ('ñmeasure', 3), ('child-cultureñ', 3), ('sinceñpioneer', 3), ('developmentña', 3), ('gasoline)', 3), ('mexicoñjohn', 3), ('australiañinternational', 3), ('condition)', 3), ('intemperanceñ', 3), ('dietñthe', 3), ('significationñthe', 3), ('influencesñlaw', 3), ('houseña', 3), ('state)', 3), ('histories)', 3), ('nuisanceñthe', 3), ('marredñmodesty', 3), ('¥ã', 3), ('sweetñ', 3), ('buchananñyes', 3), ('(yes', 3), ('ñwill', 3), ('discoveredñthe', 3), ('tastersñtea', 3), ('roomña', 3), ('case)', 3), ('ñjust', 3), ('ôii', 3), ('fever)', 3), ('-*', 3), ('himself)', 3), ('handñ', 3), ('congenersña', 3), ('examinedña', 3), ('o¥', 3), ('ñshould', 3), ('marriedñthe', 3), ('voiceñ', 3), ('ñlaura', 3), ('ñjoaquin', 3), ('¥by', 3), ('ñlittle', 3), ('monthñ', 3), ('(gas', 3), ('wombñ', 3), ('tr)', 3), ('idahoñelder', 3), ('ñnellie', 3), ('beforeñ', 3), ('wayñ', 3), ('(page', 3), ('year)', 3), ("cellarñwhat's", 3), ('ñkate', 3), ('inñ', 3), ('e¥', 3), ('(on', 3), ('us)', 3), ('switzerlandñelder', 3), ('food)', 3), ('(dress', 3), ('ñat', 3), ('back)', 3), ('ôvt', 3), ('formationñaltitude', 3), ('*parlor', 3), ('con_', 3), ('%*', 3), ('people)', 3), ('subjectñone', 3), ('molars)', 3), ('(these', 3), ('flour_', 3), ('ñbuds', 3), ('itñthat', 3), ('barbarismñtobacco', 3), ('(subject', 3), ('stomachñgin', 3), ('lovelinessñthe', 3), ('coffeeñpork', 3), ('pubertyña', 3), ('(sold', 3), ('ñsunday-school', 3), ('+++', 3), ('worldñman', 3), ('comfortñmakes', 3), ('liversña', 3), ('featuresñtheo-', 3), ('ñfrank', 3), ('ñhenry', 3), ('(through', 3), ('in_', 3), ('ñsee', 3), ('ñsturdy', 3), ('(he)', 3), ('dried)', 3), ('(looking', 3), ('ñposition', 3), ('ñsouthern', 3), ('societyñnew', 3), ('_a', 3), ('itñit', 3), ('ñmince', 3), ('ñtheir', 3), ('pen)', 3), ('vitalityñchildren', 3), ('t=', 3), ('¥e', 3), ('tipplingñtobacco', 3), ('airñhow', 3), ('(chap', 3), ('ñlawyer', 3), ('favorñits', 3), ('doctrineña', 3), ('coã', 3), ('xviiñdorsal', 3), ('()', 3), ('ñcor', 3), ('medicationñ', 3), ('adñ', 3), ('motherñimportant', 3), ('i%', 3), ("'ã", 3), ('poisoningña', 3), ('oneña', 3), ('vainñthe', 3), ('(j)', 3), ('ñeben', 3), ('iñi', 3), ('beñ', 3), ('rememberñthere', 3), ('mannersñsowing', 3), ('societyñwest', 3), ('\\vhy', 3), ("(don't", 3), ('ñdip', 3), ('instance)', 3), ('ñsurely', 3), ('houseñthe', 3), ('(common', 3), ('ñrecipes', 3), ('sugar)', 3), ('physicianñ', 3), ('declineñeconomy', 3), ('(plus', 3), ('foodsñthe', 3), ('ñbabyhood', 3), ('r)', 3), ('ñmail', 3), ('inhabitantsñthe', 3), ('[san', 3), ('(let', 3), ('ñnearly', 3), ('ñofñ', 3), ('lit¥', 3), ('bodyñthe', 3), ('sinsñerrors', 3), ('societyñn', 3), ('ñbreak', 3), ('myselfñas', 3), ('frie\\d', 3), ('(luke', 3), ('thoughtñ', 3), ('deo)', 3), ('g)', 3), ('ñshake', 3), ('profit)', 3), ('[or', 3), ('ñoil', 3), ('♦', 3), ('carolinañ', 3), ('i¡', 3), ("'ñ", 3), ('w_', 3), ('(silk', 3), ('suggestionsñante-natal', 3), ('com_', 3), ('ñwhy', 3), ('ñspread', 3), ('ñmilk', 3), ("`'", 3), (')i', 3), ('sinñ', 3), ('deserveñreverently', 3), ('(babyhood)', 3), ('tuberculosisñits', 3), ('girlñ', 3), ('(unless', 3), ('poisonñtobacco', 3), ('sayñand', 3), ('ñ¥', 3), ('childñthe', 3), ('retreat)', 3), ('ñduring', 3), ('stillñ', 3), ('periodñimportant', 3), ('ex_', 3), ('•••', 3), ('(without', 3), ('ñemily', 3), ('ti)', 3), ('for_', 3), ('womenñsexual', 3), ('ciselyñyet', 3), ('topersñtea', 3), ('(white', 3), ('_-', 3), ('usingñthe', 3), ('ñ/', 3), ('ñenglish', 3), ('(-', 3), ('))', 3), ('zealandñedward', 3), ('summerñmap', 3), ('ñhouse-', 3), ('ñindeed', 3), ('nobodyñonly', 3), ('whileñ', 3), ("fasteningñcan't", 3), ('diseaseñtobacco-using', 3), ('bestñfor', 3), ('(illustrated', 3), ('ã\\', 3), ('smithñ', 2), ('tractsocietyñl', 2), ('countryñthe', 2), ('ñeliza', 2), ('sugars)', 2), ('(soft', 2), ('(they)', 2), ('entr\x8ee', 2), ('motherña', 2), ("ñwoman's", 2), ('bodyñfor', 2), ('brideñ', 2), ('ñjour-', 2), ('\\tg', 2), ('‘', 2), ('ñmaryland', 2), ('g¥', 2), ('result)', 2), ('¥s', 2), ('answerñthe', 2), ('____', 2), ('bread)', 2), ('worldlinessñwho', 2), ('dressingñin', 2), ('<<', 2), ('druggist)', 2), (')s', 2), ('lit=', 2), ('(where', 2), ('affectionñare', 2), ('\\taining', 2), ('ñsteam', 2), ('(registration', 2), ('ñharriet', 2), ('millsñ', 2), ('norwayñsundhedsbiadet', 2), ...]
Correction 1 -- Normalize Special Characters¶
In [13]:
# %load shared_elements/normalize_characters.py
prev = "baseline"
cycle = "correction1"
directories = GoH.utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = GoH.utilities.readfile(directories['prev'], filename)
# Substitute for all other dashes
content = re.sub(r"—-—–‑", r"-", content)
# Substitute formatted apostrophe
content = re.sub(r"\’\’\‘\'\‛\´", r"'", content)
# Replace all special characters with a space (as these tend to occur at the end of lines)
content = re.sub(r"[^a-zA-Z0-9\s,.!?$:;\-&\'\"]", r" ", content)
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
In [14]:
# %load shared_elements/summary.py
summary = GoH.reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/PHJ/correction1 Average verified rate: 0.9563835150945872 Average of error rates: 0.05139457898244834 Total token count: 2895740
In [15]:
# %load shared_elements/top_errors.py
errors_summary = GoH.reports.get_errors_summary( summary )
GoH.reports.top_errors( errors_summary, 10 )[:50]
Out[15]:
[('-', 3495), ('m', 2732), ('d', 2320), ("'", 2130), ('tion', 1424), ('e', 1371), ('con-', 1360), ('in-', 1275), ('re-', 1243), ('r', 1238), ('w', 1166), ('t', 1148), ('n', 1104), ('co', 1103), ('be-', 798), ('f', 775), ('g', 754), ('ex-', 744), ('de-', 692), ('com-', 658), ('ment', 652), ('dis-', 642), ('x', 639), ('lb', 585), ('pro-', 509), ('im-', 452), ('per-', 412), ('un-', 408), ('tions', 380), ('pre-', 347), ('ful', 334), ('ad-', 330), ('sub-', 329), ('en-', 308), ('ments', 272), ('th', 260), ('sel', 259), ('al-', 257), ('ap-', 250), ('ac-', 248), ('oo', 235), ('pp', 228), ('mo', 227), ('ous', 225), ('k', 210), ('sup-', 209), ('z', 206), ('to-', 201), ('u', 196), ('treat-', 196)]
Correction 2 -- Correct Line Endings¶
In [16]:
# %load shared_elements/correct_line_endings.py
prev = cycle
cycle = "correction2"
directories = GoH.utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = GoH.utilities.readfile(directories['prev'], filename)
content = re.sub(r"(\w+)(\-\s{1,})([a-z]+)", r"\1\3", content)
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
In [17]:
# %load shared_elements/summary.py
summary = GoH.reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/PHJ/correction2 Average verified rate: 0.9782014724508072 Average of error rates: 0.030641413019329045 Total token count: 2851844
In [18]:
# %load shared_elements/top_errors.py
errors_summary = GoH.reports.get_errors_summary( summary )
GoH.reports.top_errors( errors_summary, 10 )[:50]
Out[18]:
[('-', 3480), ('m', 2730), ('d', 2320), ("'", 2130), ('e', 1370), ('r', 1235), ('w', 1166), ('t', 1146), ('n', 1104), ('co', 1101), ('f', 775), ('g', 753), ('x', 639), ('lb', 585), ('th', 260), ('sel', 251), ('oo', 235), ('pp', 228), ('mo', 227), ('k', 210), ('z', 206), ('u', 196), ("an'", 192), ('--', 189), ('-page', 133), ('q', 132), ('ournal', 103), ('society-', 100), ('al', 100), ("'tis", 87), ('te', 86), ('pa', 83), ('ex', 82), ('ro', 82), ('oz', 81), ("'the", 80), ('ga', 77), ('munn', 73), ('va', 73), ('io', 72), ('ti', 70), ("''", 66), ('tion', 63), ("infants'", 61), ('---', 60), ('id', 59), ('zo', 55), ('viperance', 54), ('si', 48), ('urnal', 48)]
Correction 3 -- Remove extra dashes¶
In [19]:
# %load shared_elements/remove_extra_dashes.py
prev = cycle
cycle = "correction3"
directories = GoH.utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = GoH.utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = GoH.utilities.tokenize_text(text)
replacements = []
for token in tokens:
if token[0] is "-":
replacements.append((token, token[1:]))
elif token[-1] is "-":
replacements.append((token, token[:-1]))
else:
pass
if len(replacements) > 0:
print("{}: {}".format(filename, replacements))
for replacement in replacements:
content = GoH.clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
PHJ18850601-V01-01-page1.txt: [('-MONTHLY', 'MONTHLY'), ('-', ''), ('TEMPER-', 'TEMPER')] PHJ18850601-V01-01-page10.txt: [('Temperqee-', 'Temperqee')] PHJ18850601-V01-01-page12.txt: [('-suppression', 'suppression')] PHJ18850601-V01-01-page13.txt: [('-', ''), ('"--', '"-')] PHJ18850601-V01-01-page15.txt: [('Tar-', 'Tar')] PHJ18850601-V01-01-page16.txt: [('dis-', 'dis')] PHJ18850601-V01-01-page17.txt: [('-ed', 'ed'), ('Lam-', 'Lam')] PHJ18850601-V01-01-page19.txt: [('--', '-')] PHJ18850601-V01-01-page2.txt: [('-wage', 'wage')] PHJ18850601-V01-01-page21.txt: [('symp-', 'symp')] PHJ18850601-V01-01-page23.txt: [('-a-', 'a-'), ('-s...se', 's...se'), ('-.', '.'), ('-...-', '...-'), ('----a', '---a'), ('-', ''), ('--', '-'), ('---', '--'), ('-', ''), ('-', ''), ('OTHER-', 'OTHER'), ('-', ''), ('-', '')] PHJ18850601-V01-01-page24.txt: [('.-', '.'), ('-', ''), ('----', '---'), ('CAL.--', 'CAL.-')] PHJ18850601-V01-01-page3.txt: [('care-', 'care')] PHJ18850601-V01-01-page7.txt: [('-', '')] PHJ18850601-V01-01-page9.txt: [('ap-', 'ap')] PHJ18850801-V01-02-page11.txt: [('tre-', 'tre')] PHJ18850801-V01-02-page23.txt: [('Library-', 'Library'), ('-', ''), ('-', '')] PHJ18850801-V01-02-page24.txt: [('--', '-'), ('CAL.cp----', 'CAL.cp---'), ('-', ''), ('-......', '......'), ('-', ''), ('-IFIWWW', 'IFIWWW')] PHJ18850801-V01-02-page7.txt: [('con-', 'con')] PHJ18850801-V01-02-page8.txt: [('-', '')] PHJ18851001-V01-03-page13.txt: [('--', '-')] PHJ18851001-V01-03-page18.txt: [('-a-month', 'a-month')] PHJ18851001-V01-03-page2.txt: [('-all', 'all'), ('table--', 'table-')] PHJ18851001-V01-03-page23.txt: [('G-', 'G'), ('--', '-'), ('-', ''), ('-', '')] PHJ18851001-V01-03-page24.txt: [('-', ''), ('-For', 'For'), ('iitslitrlifVit-', 'iitslitrlifVit'), ('Fig-', 'Fig')] PHJ18851001-V01-03-page3.txt: [('per-', 'per'), ('dif-', 'dif')] PHJ18851001-V01-03-page6.txt: [('sub-', 'sub'), ('Ameri-', 'Ameri')] PHJ18851201-V01-04-page1.txt: [('-', ''), ('-n-.-', 'n-.-'), ('strength-', 'strength'), ('-e', 'e')] PHJ18851201-V01-04-page10.txt: [('false-', 'false')] PHJ18851201-V01-04-page11.txt: [('discov-', 'discov')] PHJ18851201-V01-04-page13.txt: [('-', '')] PHJ18851201-V01-04-page14.txt: [('-', '')] PHJ18851201-V01-04-page21.txt: [('JOUR-', 'JOUR'), ('JOUR-', 'JOUR'), ('-', '')] PHJ18851201-V01-04-page23.txt: [('ATONENII-', 'ATONENII'), ('---o', '--o')] PHJ18851201-V01-04-page24.txt: [('ale.gboetve-', 'ale.gboetve'), ('--', '-'), ('-', ''), ('co-', 'co'), ('t-', 't'), ('-', '')] PHJ18851201-V01-04-page6.txt: [('obser-', 'obser')] PHJ18851201-V01-04-page8.txt: [('-', '')] PHJ18860201-V01-05-page1.txt: [('pERh\'"--', 'pERh\'"-')] PHJ18860201-V01-05-page11.txt: [('Inter-', 'Inter')] PHJ18860201-V01-05-page16.txt: [('mem-', 'mem')] PHJ18860201-V01-05-page17.txt: [('oh-', 'oh')] PHJ18860201-V01-05-page18.txt: [('Examina-', 'Examina')] PHJ18860201-V01-05-page21.txt: [('cer-', 'cer'), ('State-', 'State')] PHJ18860201-V01-05-page23.txt: [('PRESERVA-', 'PRESERVA'), ('P-', 'P'), ('-', ''), ('con-', 'con')] PHJ18860201-V01-05-page24.txt: [('.V.e..SIT-', '.V.e..SIT'), ('W-', 'W'), ('VALLZT-', 'VALLZT'), ('it-', 'it'), ('-', ''), ('-', ''), ('F-', 'F'), ('-..', '..')] PHJ18860201-V01-05-page8.txt: [('mem-', 'mem')] PHJ18860201-V01-05-page9.txt: [('--', '-')] PHJ18860401-V01-06-page13.txt: [('an-', 'an')] PHJ18860401-V01-06-page14.txt: [('North-', 'North')] PHJ18860401-V01-06-page16.txt: [('DIS-', 'DIS')] PHJ18860401-V01-06-page20.txt: [('-page', 'page'), ('-page', 'page')] PHJ18860401-V01-06-page21.txt: [('JOUR-', 'JOUR')] PHJ18860401-V01-06-page22.txt: [('PREVEN-', 'PREVEN')] PHJ18860401-V01-06-page23.txt: [('-mo.', 'mo.'), ('-PAGE', 'PAGE'), ('SAB-', 'SAB'), ('SEC-', 'SEC'), ('s-', 's')] PHJ18860401-V01-06-page24.txt: [('-', ''), ('-L', 'L'), ('tern-', 'tern'), ('-erg', 'erg'), ('Vit-', 'Vit'), ('-irliif', 'irliif'), ('-', '')] PHJ18860401-V01-06-page4.txt: [('knowl-', 'knowl')] PHJ18860401-V01-06-page9.txt: [('-', '')] PHJ18860601-V02-01-page12.txt: [('-', ''), ('-sons', 'sons')] PHJ18860601-V02-01-page15.txt: [('sev-', 'sev')] PHJ18860601-V02-01-page16.txt: [('-', '')] PHJ18860601-V02-01-page19.txt: [('Mad-', 'Mad')] PHJ18860601-V02-01-page20.txt: [('-ton', 'ton'), ('fla-', 'fla'), ('-', ''), ('-', ''), ('-', '')] PHJ18860601-V02-01-page22.txt: [('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society')] PHJ18860601-V02-01-page23.txt: [('-', ''), ('-', '')] PHJ18860601-V02-01-page24.txt: [('V-', 'V'), ('F-', 'F'), ('-i-WWWW', 'i-WWWW'), ('VALLEY-', 'VALLEY'), ('-', ''), ('-', '')] PHJ18860601-V02-01-page3.txt: [('derange-', 'derange'), ('well-', 'well')] PHJ18860601-V02-01-page6.txt: [('move-', 'move')] PHJ18860601-V02-01-page7.txt: [('BEDS.-', 'BEDS.')] PHJ18860601-V02-01-page9.txt: [('ob-', 'ob'), ('in-', 'in')] PHJ18860801-V02-02-page1.txt: [('-', ''), ('-', '')] PHJ18860801-V02-02-page13.txt: [('con-', 'con'), ('-', '')] PHJ18860801-V02-02-page15.txt: [('prod-', 'prod')] PHJ18860801-V02-02-page20.txt: [('SUPPLY.---', 'SUPPLY.--')] PHJ18860801-V02-02-page21.txt: [('dareddown-', 'dareddown')] PHJ18860801-V02-02-page24.txt: [('-', '')] PHJ18860801-V02-02-page25.txt: [('-t', 't')] PHJ18860801-V02-02-page26.txt: [('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('-', ''), ('-', ''), ('-', ''), ('-SENTINEL', 'SENTINEL')] PHJ18860801-V02-02-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('SUP-', 'SUP'), ('-', ''), ('-', ''), ('-', '')] PHJ18860801-V02-02-page29.txt: [('"-', '"'), ('---', '--'), ('-', ''), ('-vvoax', 'vvoax')] PHJ18860801-V02-02-page31.txt: [('STOM-', 'STOM'), ('-', ''), ('-', ''), ('de-', 'de')] PHJ18860801-V02-02-page32.txt: [('------', '-----'), ('L-', 'L'), ('-', ''), ('-..', '..'), ('CAL.a.-', 'CAL.a.'), ('WW-', 'WW'), ('-f', 'f'), ('VW-', 'VW'), ('C-', 'C')] PHJ18860801-V02-02-page7.txt: [('success-', 'success')] PHJ18860801-V02-02-page9.txt: [('mus-', 'mus')] PHJ18861001-V02-03-page1.txt: [('-', ''), ('-k', 'k'), ('.-', '.'), ('....-', '....'), ('-..', '..'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('r--', 'r-'), ('-', ''), ('-', ''), ('N-', 'N'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18861001-V02-03-page11.txt: [('cov-', 'cov')] PHJ18861001-V02-03-page15.txt: [('-at', 'at')] PHJ18861001-V02-03-page18.txt: [('-', ''), ('ex-', 'ex')] PHJ18861001-V02-03-page19.txt: [('nder-cloth--', 'nder-cloth-')] PHJ18861001-V02-03-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18861001-V02-03-page26.txt: [('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('-', ''), ('-', ''), ('-', ''), ('-page', 'page'), ('-page', 'page')] PHJ18861001-V02-03-page27.txt: [('t--', 't-'), ('-', ''), ('-tQ', 'tQ')] PHJ18861001-V02-03-page28.txt: [('.-', '.'), ('-', ''), ('---', '--')] PHJ18861001-V02-03-page29.txt: [('-MO.', 'MO.'), ('-PAGE', 'PAGE'), ('SAB-', 'SAB'), ('SEC-', 'SEC'), ('-', ''), ('-', '')] PHJ18861001-V02-03-page3.txt: [('cli-', 'cli')] PHJ18861001-V02-03-page30.txt: [('SII-', 'SII'), ('-jil', 'jil'), ('-', ''), ('PRESERVA-', 'PRESERVA'), ('SCRIP-', 'SCRIP'), ('na-', 'na'), ('-', ''), ('con-', 'con')] PHJ18861001-V02-03-page32.txt: [('-', ''), ('IA-', 'IA'), ('VALLEY-', 'VALLEY'), ('--', '-'), ('-', ''), ('lat-', 'lat'), ('lit-', 'lit')] PHJ18861201-V02-04-page1.txt: [('.-.-', '.-.'), ('........--', '........-'), ('N-.-', 'N-.'), ('-', ''), ('-', ''), ('--', '-'), ('.--', '.-'), ('--', '-'), ('--', '-'), ('--', '-'), ('-', ''), ('..-', '..'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-..', '..'), ("-'", "'"), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('.-', '.'), ('-N', 'N'), ('Iiii-', 'Iiii'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('I\'A\'."."-', 'I\'A\'."."'), ('----"zz', '---"zz'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('v-', 'v'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ("'-", "'"), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.--', '.-'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18861201-V02-04-page15.txt: [('Jour-', 'Jour'), ('--one', '-one')] PHJ18861201-V02-04-page16.txt: [('--', '-')] PHJ18861201-V02-04-page2.txt: [('-', ''), ('---', '--')] PHJ18861201-V02-04-page20.txt: [('-', '')] PHJ18861201-V02-04-page26.txt: [('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('-', ''), ('Society-', 'Society')] PHJ18861201-V02-04-page27.txt: [('Es--', 'Es-'), ('PRESERVA-', 'PRESERVA'), ('SCRIP-', 'SCRIP'), ('con-', 'con'), ('-', '')] PHJ18861201-V02-04-page29.txt: [('-ame', 'ame')] PHJ18861201-V02-04-page3.txt: [('-', ''), ('-', ''), ('-', '')] PHJ18861201-V02-04-page30.txt: [('IN-', 'IN')] PHJ18861201-V02-04-page31.txt: [('-', '')] PHJ18861201-V02-04-page32.txt: [('-V', 'V'), ('-.', '.'), ('-', ''), ('-', ''), ('in-', 'in')] PHJ18870201-V02-05-page1.txt: [('-t-', 't-'), ('-..', '..'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Patient-', 'Patient'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18870201-V02-05-page13.txt: [('con-', 'con')] PHJ18870201-V02-05-page17.txt: [('something."-', 'something."')] PHJ18870201-V02-05-page19.txt: [('-fr', 'fr'), ('-', ''), ('-', ''), ('beef--', 'beef-'), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18870201-V02-05-page2.txt: [('-', ''), ('-', ''), ('-', '')] PHJ18870201-V02-05-page20.txt: [('---', '--')] PHJ18870201-V02-05-page21.txt: [('-from', 'from')] PHJ18870201-V02-05-page22.txt: [('-Upon', 'Upon'), ('be-', 'be')] PHJ18870201-V02-05-page23.txt: [('-', ''), ('-', '')] PHJ18870201-V02-05-page24.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18870201-V02-05-page26.txt: [('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('-FJR-', 'FJR-')] PHJ18870201-V02-05-page29.txt: [('IN-', 'IN'), ('-', ''), ('-', ''), ('-C', 'C'), ('I-', 'I'), ('-htio', 'htio'), ('-', '')] PHJ18870201-V02-05-page30.txt: [('-', ''), ('-', ''), ('-', '')] PHJ18870201-V02-05-page32.txt: [('-.VlP', '.VlP'), ('t-', 't')] PHJ18870201-V02-05-page6.txt: [('-clueing', 'clueing'), ('nar-', 'nar'), ('-cotics', 'cotics'), ('per-', 'per')] PHJ18870401-V02-06-page1.txt: [('-', ''), ('-rl', 'rl'), ('-', ''), ('-', ''), ('-DER', 'DER'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('N-', 'N'), ('a.-', 'a.')] PHJ18870401-V02-06-page10.txt: [('-would', 'would'), ('noth-', 'noth')] PHJ18870401-V02-06-page11.txt: [('--', '-')] PHJ18870401-V02-06-page14.txt: [('Insane-', 'Insane')] PHJ18870401-V02-06-page16.txt: [('an-', 'an')] PHJ18870401-V02-06-page19.txt: [('-', ''), ('Sat-', 'Sat')] PHJ18870401-V02-06-page23.txt: [('-every', 'every')] PHJ18870401-V02-06-page24.txt: [('Peerless-', 'Peerless'), ('.-', '.'), ('-', ''), ('FOUNTAIN-', 'FOUNTAIN'), ('-', '')] PHJ18870401-V02-06-page26.txt: [('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('y-', 'y'), ('Society-', 'Society'), ('Society-', 'Society'), ('-N.', 'N.'), ('Society-', 'Society'), ('cor-', 'cor'), ('-', ''), ('Kg-', 'Kg'), ('cor-', 'cor')] PHJ18870401-V02-06-page27.txt: [('-and', 'and')] PHJ18870401-V02-06-page28.txt: [('Satan-', 'Satan'), ('-', ''), ('-', '')] PHJ18870401-V02-06-page29.txt: [('-', '')] PHJ18870401-V02-06-page30.txt: [('-.', '.'), ('.--', '.-'), ('-', ''), ('....i.-', '....i.'), ('-', '')] PHJ18870401-V02-06-page31.txt: [('-', ''), ('Prescrip-', 'Prescrip'), ('be-', 'be'), ('sell-', 'sell'), ('IN-', 'IN'), ('-', ''), ('-', '')] PHJ18870401-V02-06-page32.txt: [('-', ''), ('-l', 'l'), ('iitrgiWW.Nfit\'VIVWVit"-', 'iitrgiWW.Nfit\'VIVWVit"'), ('-----', '----'), ('---', '--')] PHJ18870401-V02-06-page7.txt: [('-', '')] PHJ18870601-V02-07-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Circula-', 'Circula'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18870601-V02-07-page11.txt: [('-', '')] PHJ18870601-V02-07-page12.txt: [('re-', 're')] PHJ18870601-V02-07-page15.txt: [('Declar-', 'Declar')] PHJ18870601-V02-07-page19.txt: [('-', '')] PHJ18870601-V02-07-page2.txt: [('-', ''), ('-', '')] PHJ18870601-V02-07-page20.txt: [('Mc-', 'Mc')] PHJ18870601-V02-07-page21.txt: [('-sand', 'sand')] PHJ18870601-V02-07-page22.txt: [('differ-', 'differ')] PHJ18870601-V02-07-page25.txt: [('-stomachs', 'stomachs'), ('-cracker', 'cracker'), ('-combines', 'combines'), ('But-', 'But')] PHJ18870601-V02-07-page26.txt: [('PRES-', 'PRES')] PHJ18870601-V02-07-page27.txt: [('-Georgetown', 'Georgetown'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society--', 'Society-'), ('Society-', 'Society'), ('-Ofifce', 'Ofifce'), ('y-', 'y'), ('Society-', 'Society'), ('Society-', 'Society'), ('-Upper', 'Upper'), ('Society-', 'Society'), ('Society-', 'Society'), ('GEN-', 'GEN'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18870601-V02-07-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('P-', 'P'), ('-', ''), ('-', ''), ('-', '')] PHJ18870601-V02-07-page29.txt: [('Sup-', 'Sup'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18870601-V02-07-page31.txt: [('-Was', 'Was')] PHJ18870601-V02-07-page32.txt: [('Ail-', 'Ail'), ('regularly-gradu-', 'regularly-gradu'), ('-', ''), ('-', '')] PHJ18870601-V02-07-page6.txt: [('-', '')] PHJ18870801-V02-08-page1.txt: [('-Tr"', 'Tr"'), ('-..alii', '..alii'), ('"-I.LI-', '"-I.LI'), ('--', '-'), ('---', '--'), ('-.-', '.-'), ('--', '-'), ('"--', '"-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('A-', 'A'), ('-', ''), ('-.', '.'), ('---.', '--.'), ('-.', '.'), ('-', ''), ('---', '--'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('G---', 'G--'), ('rr-', 'rr'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('t--', 't-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('o-ys-----', 'o-ys----'), ('-', ''), ("--'", "-'"), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('c-', 'c'), ('-', ''), ('---.', '--.'), ('"-', '"'), ('--', '-'), ('-.', '.'), ('...-', '...'), ('.-', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('--.L', '-.L'), ('-', ''), ('-', ''), ('-', ''), ('-a', 'a'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('--', '-'), ('..--', '..-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('-', ''), ('-', ''), ('--', '-'), ('----', '---'), ('-', '')] PHJ18870801-V02-08-page10.txt: [('-the', 'the')] PHJ18870801-V02-08-page12.txt: [('-wife', 'wife')] PHJ18870801-V02-08-page17.txt: [('-maintains', 'maintains')] PHJ18870801-V02-08-page18.txt: [('re-', 're')] PHJ18870801-V02-08-page2.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18870801-V02-08-page25.txt: [('-', '')] PHJ18870801-V02-08-page26.txt: [('-"', '"'), ('S.-', 'S.'), ('-"', '"')] PHJ18870801-V02-08-page27.txt: [('Society-', 'Society'), ('Her-', 'Her'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('-all', 'all'), ('-a', 'a'), ('cor-', 'cor'), ('-', ''), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society')] PHJ18870801-V02-08-page28.txt: [('-', ''), ('-', ''), ('-Address', 'Address'), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18870801-V02-08-page29.txt: [('Prescrlp-', 'Prescrlp'), ('sell-', 'sell'), ('IN-', 'IN'), ('-', ''), ('-', ''), ('-', '')] PHJ18870801-V02-08-page30.txt: [('...y-', '...y')] PHJ18870801-V02-08-page31.txt: [('---', '--'), ('-', ''), ('-', ''), ('-FTIIE', 'FTIIE'), ('--a', '-a'), ('-G', 'G'), ('-HRILLIra', 'HRILLIra')] PHJ18870801-V02-08-page32.txt: [('-', ''), ('-', ''), ('IA-', 'IA'), ('IAlit-', 'IAlit'), ('DIELALTE-', 'DIELALTE')] PHJ18870801-V02-08-page6.txt: [('-', '')] PHJ18871001-V02-09-page1.txt: [('......................................................--', '......................................................-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('po-', 'po'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18871001-V02-09-page17.txt: [('honor-', 'honor')] PHJ18871001-V02-09-page2.txt: [('-', '')] PHJ18871001-V02-09-page21.txt: [('-', ''), ('-', '')] PHJ18871001-V02-09-page22.txt: [('ijousel-', 'ijousel')] PHJ18871001-V02-09-page24.txt: [('--', '-'), ('-', ''), ('-', ''), ('e-', 'e'), ('-', ''), ('-.', '.'), ('Electro-', 'Electro'), ('UT-', 'UT')] PHJ18871001-V02-09-page25.txt: [('-', ''), ('-', '')] PHJ18871001-V02-09-page26.txt: [('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Society-', 'Society'), ('Record-', 'Record')] PHJ18871001-V02-09-page27.txt: [('-Was', 'Was')] PHJ18871001-V02-09-page28.txt: [('-.', '.')] PHJ18871001-V02-09-page29.txt: [('-WITH', 'WITH'), ('-', '')] PHJ18871001-V02-09-page3.txt: [('-', '')] PHJ18871001-V02-09-page31.txt: [('-TbB', 'TbB'), ('-', ''), ('-', ''), ('-', ''), ('P-', 'P')] PHJ18871001-V02-09-page32.txt: [('-t', 't')] PHJ18871001-V02-09-page33.txt: [('-olum', 'olum')] PHJ18871001-V02-09-page36.txt: [('PAGE-', 'PAGE'), ('-', '')] PHJ18871001-V02-09-page5.txt: [('apolo-', 'apolo')] PHJ18871001-V02-09-page6.txt: [('-', '')] PHJ18871001-V02-09-page7.txt: [('-', '')] PHJ18871001-V02-09-page8.txt: [('window--', 'window-')] PHJ18880101-V03-01-page1.txt: [('-', ''), ('-PAGE', 'PAGE')] PHJ18880101-V03-01-page10.txt: [('away-', 'away')] PHJ18880101-V03-01-page13.txt: [('be-', 'be')] PHJ18880101-V03-01-page16.txt: [('seasick-', 'seasick')] PHJ18880101-V03-01-page17.txt: [('go-', 'go')] PHJ18880101-V03-01-page19.txt: [('con-', 'con')] PHJ18880101-V03-01-page2.txt: [('saccha-', 'saccha')] PHJ18880101-V03-01-page20.txt: [('-', '')] PHJ18880101-V03-01-page26.txt: [('Tobacco-', 'Tobacco'), ('-', '')] PHJ18880101-V03-01-page28.txt: [('Electro-', 'Electro'), ('----', '---')] PHJ18880101-V03-01-page29.txt: [('-', '')] PHJ18880101-V03-01-page30.txt: [("Bird's-", "Bird's")] PHJ18880101-V03-01-page31.txt: [('Chromo-', 'Chromo'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-------', '------'), ('-Style', 'Style')] PHJ18880101-V03-01-page32.txt: [('-', '')] PHJ18880101-V03-01-page5.txt: [('mur-', 'mur'), ('-xcellence', 'xcellence')] PHJ18880101-V03-01-page6.txt: [('break-', 'break')] PHJ18880101-V03-01-page7.txt: [('thou-', 'thou')] PHJ18880201-V03-02-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-PAGE', 'PAGE')] PHJ18880201-V03-02-page11.txt: [('iron-', 'iron')] PHJ18880201-V03-02-page13.txt: [('two-', 'two')] PHJ18880201-V03-02-page14.txt: [('-', ''), ('-', '')] PHJ18880201-V03-02-page18.txt: [('-', '')] PHJ18880201-V03-02-page23.txt: [('-', ''), ('-', '')] PHJ18880201-V03-02-page24.txt: [('AD-', 'AD'), ('-page', 'page')] PHJ18880201-V03-02-page26.txt: [('Tobacco-', 'Tobacco'), ('-', '')] PHJ18880201-V03-02-page27.txt: [('-ERFUL', 'ERFUL'), ('-sdho', 'sdho'), ('-Volume', 'Volume')] PHJ18880201-V03-02-page28.txt: [('Electro-', 'Electro')] PHJ18880201-V03-02-page3.txt: [('abomi-', 'abomi')] PHJ18880201-V03-02-page30.txt: [('-b-ums.', 'b-ums.'), ('-This', 'This'), ('-SEND', 'SEND')] PHJ18880201-V03-02-page31.txt: [('Chromo-', 'Chromo'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-------', '------')] PHJ18880201-V03-02-page4.txt: [('-', '')] PHJ18880201-V03-02-page5.txt: [('emul-', 'emul')] PHJ18880201-V03-02-page8.txt: [('-correct', 'correct')] PHJ18880201-V03-02-page9.txt: [('sensibil-', 'sensibil'), ('con-', 'con'), ('be-', 'be')] PHJ18880301-V03-03-page1.txt: [('-', ''), ('-', '')] PHJ18880301-V03-03-page10.txt: [('blood-', 'blood')] PHJ18880301-V03-03-page11.txt: [('consid-', 'consid'), ('fount-', 'fount')] PHJ18880301-V03-03-page18.txt: [('--', '-')] PHJ18880301-V03-03-page2.txt: [('en-', 'en'), ('sys-', 'sys')] PHJ18880301-V03-03-page20.txt: [('Ex-', 'Ex')] PHJ18880301-V03-03-page24.txt: [('TEM-', 'TEM'), ('Record-', 'Record')] PHJ18880301-V03-03-page25.txt: [('-it', 'it'), ('Electro-', 'Electro'), ('.-', '.'), ('-', ''), ('-', ''), ('---', '--'), ('r---', 'r--'), ('-', ''), ('-', ''), ('-', '')] PHJ18880301-V03-03-page26.txt: [('per-', 'per'), ('--', '-'), ('DE-', 'DE'), ('-page', 'page')] PHJ18880301-V03-03-page27.txt: [('LAUN-', 'LAUN'), ('-', ''), ('-', ''), ('--wl-.food', '-wl-.food'), ('miss.n-fw-', 'miss.n-fw'), ('-page', 'page'), ('-az', 'az')] PHJ18880301-V03-03-page28.txt: [('-', ''), ('-', '')] PHJ18880301-V03-03-page3.txt: [('monu-', 'monu')] PHJ18880301-V03-03-page30.txt: [('--', '-'), ("i'li-", "i'li"), ('---G', '--G'), ("Bird's-", "Bird's")] PHJ18880301-V03-03-page31.txt: [('I-', 'I'), ('Chromo-', 'Chromo'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-------', '------'), ('--', '-'), ('-', '')] PHJ18880301-V03-03-page4.txt: [('-', '')] PHJ18880301-V03-03-page5.txt: [('ex-', 'ex')] PHJ18880301-V03-03-page7.txt: [('most.-', 'most.')] PHJ18880301-V03-03-page9.txt: [('-', '')] PHJ18880401-V03-04-page1.txt: [('-P', 'P'), ('Mes-', 'Mes')] PHJ18880401-V03-04-page11.txt: [('af-', 'af'), ('de-', 'de')] PHJ18880401-V03-04-page16.txt: [('-is', 'is')] PHJ18880401-V03-04-page17.txt: [('--', '-')] PHJ18880401-V03-04-page2.txt: [('nat-', 'nat')] PHJ18880401-V03-04-page20.txt: [('im-', 'im')] PHJ18880401-V03-04-page23.txt: [('-field', 'field')] PHJ18880401-V03-04-page25.txt: [('ar-', 'ar'), ('-', ''), ('I-', 'I'), ('JOUR-', 'JOUR')] PHJ18880401-V03-04-page26.txt: [('-', ''), ('-', ''), ('Healthr--', 'Healthr-'), ('DE-', 'DE'), ('-PAGE', 'PAGE'), ('-Arithmetic', 'Arithmetic')] PHJ18880401-V03-04-page27.txt: [('LAUN-', 'LAUN'), ("miss.'-", "miss.'"), ("-'-", "'-"), ('-page', 'page'), ('Electro-', 'Electro'), ('-page', 'page'), ('-', ''), ('-', ''), ('-r....', 'r....'), ('-', ''), ('.-.-', '.-.'), ('.-', '.'), ('-', ''), ('--', '-'), ('-..', '..'), ('-', ''), ('C-', 'C'), ("--'", "-'"), ('-', ''), ('r--', 'r-'), ('-', ''), ('-', ''), ('-r', 'r'), ('-', ''), ("-'", "'"), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18880401-V03-04-page28.txt: [('-', '')] PHJ18880401-V03-04-page31.txt: [('Chromo-', 'Chromo'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-------', '------'), ('-ELTTS', 'ELTTS')] PHJ18880401-V03-04-page32.txt: [('-', ''), ('-r', 'r')] PHJ18880401-V03-04-page8.txt: [('---', '--')] PHJ18880501-V03-05-page1.txt: [('-', ''), ('-PAGE', 'PAGE')] PHJ18880501-V03-05-page12.txt: [('ju-', 'ju')] PHJ18880501-V03-05-page13.txt: [('-a', 'a')] PHJ18880501-V03-05-page15.txt: [('con-', 'con')] PHJ18880501-V03-05-page18.txt: [('re-', 're')] PHJ18880501-V03-05-page22.txt: [('Some-', 'Some')] PHJ18880501-V03-05-page23.txt: [('in-', 'in')] PHJ18880501-V03-05-page24.txt: [('-page', 'page')] PHJ18880501-V03-05-page25.txt: [('LAUN-', 'LAUN'), ('--wttlood', '-wttlood')] PHJ18880501-V03-05-page26.txt: [('Indianapoli-', 'Indianapoli'), ('-', ''), ('per-', 'per'), ('-twining', 'twining'), ('HY-', 'HY'), ('DE-', 'DE'), ('NX-', 'NX')] PHJ18880501-V03-05-page27.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('FIRST-', 'FIRST'), ('-THE', 'THE'), ('to-', 'to'), ('APPLI-', 'APPLI'), ('-page', 'page'), ('-', ''), ('-', ''), ('-', ''), ('zz--', 'zz-'), ('-', ''), ('-', ''), ("-'z", "'z"), ('-', ''), ('r--', 'r-'), ('.--', '.-'), ('-', ''), ('c-', 'c'), (".'-", ".'"), ('.".--', '.".-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('S-', 'S')] PHJ18880501-V03-05-page28.txt: [('-PAGE', 'PAGE'), ('-Arithmetic', 'Arithmetic'), ('Electro-', 'Electro'), ('PI-', 'PI'), ('-', '')] PHJ18880501-V03-05-page3.txt: [('con-', 'con')] PHJ18880501-V03-05-page31.txt: [('Chromo-', 'Chromo'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-------', '------')] PHJ18880501-V03-05-page32.txt: [("'-", "'")] PHJ18880501-V03-05-page9.txt: [('chil-', 'chil')] PHJ18880601-V03-06-page1.txt: [('-', '')] PHJ18880601-V03-06-page11.txt: [('-.', '.'), ('-', '')] PHJ18880601-V03-06-page12.txt: [('PEO-', 'PEO')] PHJ18880601-V03-06-page17.txt: [('Thirst-Maker-', 'Thirst-Maker')] PHJ18880601-V03-06-page19.txt: [('WINE.-', 'WINE.'), ('-ro.', 'ro.'), ('INSTITU-', 'INSTITU')] PHJ18880601-V03-06-page20.txt: [('-', ''), ('Pota-', 'Pota')] PHJ18880601-V03-06-page23.txt: [('--that', '-that')] PHJ18880601-V03-06-page25.txt: [('LAUN-', 'LAUN'), ('--s', '-s'), ('-', ''), ('-page', 'page'), ('PA-', 'PA')] PHJ18880601-V03-06-page27.txt: [('-', ''), ('-', ''), ('----', '---'), ('---', '--'), ('FIRST-', 'FIRST'), ('-page', 'page'), ('BEST-', 'BEST'), ('APPLI-', 'APPLI'), ('-rte', 'rte')] PHJ18880601-V03-06-page28.txt: [('Electro-', 'Electro'), ('-PAGE', 'PAGE')] PHJ18880601-V03-06-page30.txt: [('-ums.', 'ums.')] PHJ18880601-V03-06-page31.txt: [('Chromo-', 'Chromo'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-------', '------')] PHJ18880601-V03-06-page32.txt: [('-', ''), ('CALI-', 'CALI')] PHJ18880601-V03-06-page4.txt: [('-', ''), ('-', ''), ('-', '')] PHJ18880601-V03-06-page6.txt: [('-administer', 'administer'), ('-of', 'of')] PHJ18880601-V03-06-page8.txt: [('-chest', 'chest')] PHJ18880701-V03-07-page1.txt: [('-', ''), ('-', ''), ('-P', 'P'), ('di"-', 'di"')] PHJ18880701-V03-07-page13.txt: [('con-', 'con')] PHJ18880701-V03-07-page15.txt: [('Di-', 'Di')] PHJ18880701-V03-07-page21.txt: [('A-', 'A')] PHJ18880701-V03-07-page26.txt: [('Society-', 'Society'), ('-F.', 'F.'), ('Tennessee-', 'Tennessee'), ("-'", "'"), ('per-', 'per'), ('-', ''), ('j-', 'j'), ("-'", "'"), ('HY-', 'HY')] PHJ18880701-V03-07-page27.txt: [('-', ''), ('-', ''), ('FIRST-', 'FIRST'), ('x-', 'x'), ('-page', 'page'), ('-', ''), ('r--', 'r-'), ('"-', '"'), ('.-', '.'), ('-', ''), ('---THE', '--THE'), ('to-', 'to'), ('APPLI-', 'APPLI'), ('-', ''), ('-', ''), ('ior-', 'ior'), ('i--', 'i-')] PHJ18880701-V03-07-page28.txt: [('com-', 'com'), ('Electro-', 'Electro'), ('-PAGE', 'PAGE')] PHJ18880701-V03-07-page29.txt: [('-', '')] PHJ18880701-V03-07-page30.txt: [('-PAGE', 'PAGE'), ('-', ''), ('-page', 'page')] PHJ18880801-V03-08-page1.txt: [('-PAGE', 'PAGE')] PHJ18880801-V03-08-page15.txt: [('-no', 'no')] PHJ18880801-V03-08-page21.txt: [('-', ''), ('-the', 'the'), ('de-', 'de')] PHJ18880801-V03-08-page24.txt: [("'-", "'")] PHJ18880801-V03-08-page26.txt: [('-Row', 'Row'), ('Society-', 'Society'), ('-F.', 'F.'), ('-Elder', 'Elder'), ('Tennessee-', 'Tennessee'), ('DE-', 'DE'), ('-f', 'f')] PHJ18880801-V03-08-page27.txt: [('es-', 'es'), ('Electro-', 'Electro'), ('-PAGE', 'PAGE'), ('-', '')] PHJ18880801-V03-08-page29.txt: [('-FOR-', 'FOR-')] PHJ18880801-V03-08-page30.txt: [('-page', 'page'), ('-', ''), ('-', ''), ('S--', 'S-'), ('FIRST-', 'FIRST'), ('-', ''), ('--THE', '-THE'), ('-Especially', 'Especially'), ('to-', 'to'), ('-', ''), ('APPLI-', 'APPLI')] PHJ18880801-V03-08-page31.txt: [('-PAGE', 'PAGE'), ('Tobacco-', 'Tobacco'), ('-', ''), ('-page', 'page'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-page', 'page'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18880801-V03-08-page32.txt: [('Vitalske-', 'Vitalske'), ('-', '')] PHJ18880801-V03-08-page6.txt: [('HKALTI-', 'HKALTI')] PHJ18880801-V03-08-page7.txt: [('-', '')] PHJ18880901-V03-09-page1.txt: [('-', ''), ('-PAGE', 'PAGE'), ('-', '')] PHJ18880901-V03-09-page14.txt: [('DRUNK-', 'DRUNK')] PHJ18880901-V03-09-page16.txt: [('to-', 'to'), ('to-', 'to')] PHJ18880901-V03-09-page17.txt: [('---', '--')] PHJ18880901-V03-09-page18.txt: [('---', '--')] PHJ18880901-V03-09-page24.txt: [('OMNIBUS-', 'OMNIBUS')] PHJ18880901-V03-09-page26.txt: [('an-', 'an'), ('ex-', 'ex'), ('-page', 'page'), ('--We', '-We')] PHJ18880901-V03-09-page27.txt: [('Society-', 'Society'), ('-F.', 'F.'), ('EqUAREMX-', 'EqUAREMX')] PHJ18880901-V03-09-page3.txt: [('Infirmi-', 'Infirmi')] PHJ18880901-V03-09-page30.txt: [('Electro-', 'Electro'), ('-PAGE', 'PAGE')] PHJ18880901-V03-09-page31.txt: [('-PAGE', 'PAGE'), ('jour-', 'jour'), ('-', ''), ('PRES-', 'PRES'), ('-page', 'page'), ('-rt', 'rt'), ('-page', 'page')] PHJ18880901-V03-09-page8.txt: [('SENTIMENT-', 'SENTIMENT')] PHJ18880901-V03-09-page9.txt: [('-', '')] PHJ18881001-V03-10-page1.txt: [('-', ''), ('-PAGE', 'PAGE'), ('ner-', 'ner')] PHJ18881001-V03-10-page12.txt: [('be-', 'be')] PHJ18881001-V03-10-page14.txt: [('TempeFar-', 'TempeFar')] PHJ18881001-V03-10-page18.txt: [('contin-', 'contin')] PHJ18881001-V03-10-page19.txt: [('can-', 'can')] PHJ18881001-V03-10-page22.txt: [('-', ''), ('affection-', 'affection')] PHJ18881001-V03-10-page24.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18881001-V03-10-page27.txt: [('Society-', 'Society'), ('-F.', 'F.'), ('-----"', '----"'), ('.j-', '.j'), ('-', '')] PHJ18881001-V03-10-page28.txt: [('Electro-', 'Electro'), ('-PAGE', 'PAGE'), ('-', ''), ('.egi-', '.egi')] PHJ18881001-V03-10-page3.txt: [('OVER-', 'OVER'), ('doubt-', 'doubt')] PHJ18881001-V03-10-page30.txt: [('-', ''), ('Chromo-', 'Chromo'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-------', '------')] PHJ18881001-V03-10-page32.txt: [('-', ''), ('But-', 'But')] PHJ18881001-V03-10-page6.txt: [('pro-', 'pro')] PHJ18881001-V03-10-page9.txt: [('-', '')] PHJ18881101-V03-11-page1.txt: [('-', ''), ('-Dor', 'Dor'), ('expe-', 'expe'), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18881101-V03-11-page13.txt: [('-', ''), ('-', '')] PHJ18881101-V03-11-page18.txt: [('to-', 'to')] PHJ18881101-V03-11-page19.txt: [('pros-', 'pros')] PHJ18881101-V03-11-page20.txt: [('TEM-', 'TEM')] PHJ18881101-V03-11-page24.txt: [('tyran-', 'tyran')] PHJ18881101-V03-11-page26.txt: [('brush-', 'brush')] PHJ18881101-V03-11-page27.txt: [('-Ctr', 'Ctr'), ('COPlEII-', 'COPlEII'), ('-Y.', 'Y.')] PHJ18881101-V03-11-page28.txt: [('Society-', 'Society'), ('-F.', 'F.'), ('-f-', 'f-'), ('-', ''), ('DE-', 'DE')] PHJ18881101-V03-11-page29.txt: [('-PAGE', 'PAGE'), ('-', ''), ('PRES-', 'PRES'), ('-page', 'page'), ('in-', 'in'), ('-a', 'a'), ('-"', '"'), ('"Y-', '"Y'), ('-', ''), ('-page', 'page'), ('-', '')] PHJ18881101-V03-11-page3.txt: [('syste-', 'syste'), ('Re-', 'Re')] PHJ18881101-V03-11-page31.txt: [('-FOR-', 'FOR-'), ('-', ''), ('-EN.q', 'EN.q')] PHJ18881101-V03-11-page5.txt: [('-', '')] PHJ18881101-V03-11-page7.txt: [('excep-', 'excep')] PHJ18881201-V03-12-page1.txt: [('-', ''), ('-PAGE', 'PAGE'), ('ex-', 'ex')] PHJ18881201-V03-12-page17.txt: [('Ex-', 'Ex')] PHJ18881201-V03-12-page19.txt: [('ex-', 'ex')] PHJ18881201-V03-12-page21.txt: [('re-', 're'), ('-', ''), ('treat-', 'treat')] PHJ18881201-V03-12-page27.txt: [('-page', 'page'), ('Society-', 'Society'), ('-F.', 'F.'), ('Tennessee-', 'Tennessee')] PHJ18881201-V03-12-page28.txt: [('-', ''), ('-', '')] PHJ18881201-V03-12-page29.txt: [('L-', 'L'), ('-', ''), ('-FOR-', 'FOR-'), ('Cal-', 'Cal')] PHJ18881201-V03-12-page30.txt: [('HY-', 'HY'), ('DE-', 'DE'), ('-', ''), ('NX.-', 'NX.'), ('LI-', 'LI')] PHJ18881201-V03-12-page31.txt: [('PRES-', 'PRES'), ('-page', 'page'), ('Electro-', 'Electro'), ('-PAGE', 'PAGE'), ('-', '')] PHJ18881201-V03-12-page32.txt: [('OR-', 'OR')] PHJ18881201-V03-12-page33.txt: [('Hip-', 'Hip')] PHJ18881201-V03-12-page7.txt: [('re-', 're')] PHJ18881201-V03-12-page9.txt: [('them-', 'them'), ('be-', 'be')] PHJ18890101-V04-01-page1.txt: [('-PAGE', 'PAGE'), ('-', ''), ('-', '')] PHJ18890101-V04-01-page17.txt: [('-', '')] PHJ18890101-V04-01-page21.txt: [('HYPERTRO-', 'HYPERTRO')] PHJ18890101-V04-01-page23.txt: [('ordina-', 'ordina')] PHJ18890101-V04-01-page26.txt: [('-V', 'V')] PHJ18890101-V04-01-page27.txt: [('-page', 'page'), ('-page.', 'page.'), ('-page', 'page'), ('things--', 'things-'), ('Society-', 'Society'), ('-F.', 'F.'), ('Indian-', 'Indian')] PHJ18890101-V04-01-page28.txt: [('-', ''), ('-', ''), ('-', '')] PHJ18890101-V04-01-page29.txt: [('But-', 'But')] PHJ18890101-V04-01-page3.txt: [('ani-', 'ani')] PHJ18890101-V04-01-page30.txt: [('-PAGE', 'PAGE')] PHJ18890101-V04-01-page31.txt: [('-TEMPERANCE', 'TEMPERANCE'), ('-s-', 's-'), ('-page', 'page'), ('-', ''), ('-', '')] PHJ18890101-V04-01-page32.txt: [('L-', 'L')] PHJ18890101-V04-01-page9.txt: [('edu-', 'edu')] PHJ18890201-V04-02-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-IMGE', 'IMGE'), ('-', '')] PHJ18890201-V04-02-page11.txt: [('differ-', 'differ'), ('atten-', 'atten')] PHJ18890201-V04-02-page14.txt: [('Consul-', 'Consul')] PHJ18890201-V04-02-page16.txt: [('INTEMPER-', 'INTEMPER')] PHJ18890201-V04-02-page17.txt: [('medi-', 'medi')] PHJ18890201-V04-02-page18.txt: [('pri-', 'pri')] PHJ18890201-V04-02-page2.txt: [('membr-', 'membr'), ('-e', 'e'), ('rec-', 'rec')] PHJ18890201-V04-02-page24.txt: [('-', ''), ('Muffins.-', 'Muffins.')] PHJ18890201-V04-02-page26.txt: [('Society-', 'Society'), ('-F.', 'F.'), ('Tennessee-', 'Tennessee')] PHJ18890201-V04-02-page27.txt: [('-page', 'page'), ('-page', 'page'), ('-page', 'page'), ('-page', 'page'), ('-page', 'page')] PHJ18890201-V04-02-page28.txt: [('-', '')] PHJ18890201-V04-02-page29.txt: [('-', ''), ('-', ''), ('-', ''), ('-s-', 's-'), ('-page', 'page')] PHJ18890201-V04-02-page3.txt: [('desti-', 'desti')] PHJ18890201-V04-02-page30.txt: [('.-', '.'), ('--.', '-.'), ('-PAGE', 'PAGE')] PHJ18890201-V04-02-page31.txt: [('War-', 'War'), ('-', ''), ('-', ''), ('-', '')] PHJ18890201-V04-02-page5.txt: [('-', '')] PHJ18890301-V04-03-page1.txt: [('-', ''), ('-PAGE', 'PAGE')] PHJ18890301-V04-03-page14.txt: [('dream.-', 'dream.')] PHJ18890301-V04-03-page15.txt: [('consump-', 'consump')] PHJ18890301-V04-03-page18.txt: [('sui-', 'sui')] PHJ18890301-V04-03-page19.txt: [('be-', 'be'), ('-', ''), ('A-', 'A')] PHJ18890301-V04-03-page22.txt: [('cream-', 'cream'), ('Vegetarian-', 'Vegetarian')] PHJ18890301-V04-03-page23.txt: [('-ttrwrItsrovv"-', 'ttrwrItsrovv"-')] PHJ18890301-V04-03-page25.txt: [('GAR-', 'GAR')] PHJ18890301-V04-03-page26.txt: [("t'-", "t'")] PHJ18890301-V04-03-page27.txt: [('-', ''), ('-', '')] PHJ18890301-V04-03-page28.txt: [('-', ''), ('But-', 'But')] PHJ18890301-V04-03-page29.txt: [('-', ''), ('-page', 'page')] PHJ18890301-V04-03-page3.txt: [('WES-', 'WES')] PHJ18890301-V04-03-page30.txt: [('-', '')] PHJ18890301-V04-03-page32.txt: [('-----I', '----I'), ('circu-', 'circu'), ('-PAGE', 'PAGE')] PHJ18890301-V04-03-page7.txt: [('-', ''), ('-', '')] PHJ18890301-V04-03-page9.txt: [('Chris-', 'Chris'), ('yield-', 'yield')] PHJ18890401-V04-04-page1.txt: [('-', ''), ('at-', 'at'), ('-P', 'P')] PHJ18890401-V04-04-page13.txt: [('-', '')] PHJ18890401-V04-04-page15.txt: [('naughti-', 'naughti')] PHJ18890401-V04-04-page17.txt: [('gusta-', 'gusta'), ('sen-', 'sen')] PHJ18890401-V04-04-page19.txt: [('impa-', 'impa'), ('re-', 're'), ('ex-', 'ex')] PHJ18890401-V04-04-page2.txt: [('-plainly', 'plainly')] PHJ18890401-V04-04-page24.txt: [('possi-', 'possi')] PHJ18890401-V04-04-page27.txt: [('Society-', 'Society'), ('-Lillie', 'Lillie'), ('-"-', '"-'), ('-', ''), ('..----', '..---'), ('War-', 'War')] PHJ18890401-V04-04-page28.txt: [('SANC-', 'SANC'), ("'-", "'"), ('-', ''), ('---', '--'), ('p-', 'p'), ('.-', '.'), ('I--', 'I-'), ('-', ''), ('.--', '.-'), ('-', ''), ('-', ''), ('-', ''), ('r-T-', 'r-T'), ('...-', '...'), ("-'-'c'zic.-", "'-'c'zic.-"), ('-', ''), ('I.--', 'I.-'), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('...-', '...'), ('.-', '.'), ('I-', 'I'), ('-..', '..'), ('-', ''), ('--', '-'), ('--', '-'), ('--', '-'), ('-', ''), ("'-", "'"), ('..-', '..'), ('.--', '.-')] PHJ18890401-V04-04-page30.txt: [('-', ''), ('-page', 'page')] PHJ18890401-V04-04-page31.txt: [('But-', 'But')] PHJ18890401-V04-04-page32.txt: [('-PAGE', 'PAGE'), ('-', ''), ('NEW-', 'NEW')] PHJ18890401-V04-04-page6.txt: [('actinomy-', 'actinomy')] PHJ18890501-V04-05-page1.txt: [('-', ''), ('-PAGE', 'PAGE')] PHJ18890501-V04-05-page11.txt: [('men-', 'men')] PHJ18890501-V04-05-page14.txt: [('sun-', 'sun'), ('Har-', 'Har')] PHJ18890501-V04-05-page15.txt: [('im-', 'im'), ('correspond-', 'correspond')] PHJ18890501-V04-05-page16.txt: [('car-', 'car')] PHJ18890501-V04-05-page17.txt: [('compre-', 'compre')] PHJ18890501-V04-05-page23.txt: [('cocaine-', 'cocaine')] PHJ18890501-V04-05-page24.txt: [('pat-', 'pat'), ('-eierVit', 'eierVit')] PHJ18890501-V04-05-page25.txt: [('-Ertred', 'Ertred'), ('-', ''), ('-', '')] PHJ18890501-V04-05-page27.txt: [('Society-', 'Society'), ('-Lillie', 'Lillie'), ('Society-', 'Society'), ('Tennessee-', 'Tennessee'), ('use-', 'use')] PHJ18890501-V04-05-page29.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Nat-', 'Nat'), ('-', ''), ('-', ''), ('--Massage', '-Massage'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18890501-V04-05-page30.txt: [('-', ''), ('-page', 'page')] PHJ18890501-V04-05-page31.txt: [('--', '-'), ('-FOR-', 'FOR-')] PHJ18890601-V04-06-page1.txt: [('-PAGE', 'PAGE')] PHJ18890601-V04-06-page11.txt: [('hav-', 'hav'), ('Trib-', 'Trib')] PHJ18890601-V04-06-page12.txt: [('--Stier', '-Stier')] PHJ18890601-V04-06-page15.txt: [('-', ''), ('-', '')] PHJ18890601-V04-06-page17.txt: [('an-', 'an')] PHJ18890601-V04-06-page19.txt: [('carboni-', 'carboni'), ('can-', 'can')] PHJ18890601-V04-06-page2.txt: [('appropri-', 'appropri')] PHJ18890601-V04-06-page25.txt: [('-', ''), ("Children's-", "Children's")] PHJ18890601-V04-06-page26.txt: [('III..-', 'III..')] PHJ18890601-V04-06-page27.txt: [('Society-', 'Society'), ('-Lillie', 'Lillie'), ('Tennessee-', 'Tennessee'), ("'-", "'")] PHJ18890601-V04-06-page29.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('War-', 'War'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('---', '--'), ('-', ''), ('-', ''), ('-', ''), ('----', '---'), ('-', ''), ('-', ''), ('-', ''), ('-a', 'a'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18890601-V04-06-page30.txt: [('-', ''), ('-', ''), ('-page', 'page')] PHJ18890601-V04-06-page5.txt: [('skel-', 'skel')] PHJ18890601-V04-06-page8.txt: [('indul-', 'indul')] PHJ18890701-V04-07-page1.txt: [('-PAGE', 'PAGE'), ('-', '')] PHJ18890701-V04-07-page10.txt: [('-', ''), ('Consti-', 'Consti')] PHJ18890701-V04-07-page12.txt: [('re-', 're')] PHJ18890701-V04-07-page17.txt: [('can-', 'can'), ('--', '-'), ('mani-', 'mani')] PHJ18890701-V04-07-page2.txt: [('one-', 'one')] PHJ18890701-V04-07-page22.txt: [('once.-', 'once.')] PHJ18890701-V04-07-page27.txt: [('Society-', 'Society'), ('-Lillie', 'Lillie'), ('Tennessee-', 'Tennessee')] PHJ18890701-V04-07-page28.txt: [('-', ''), ('-.."', '.."'), ('World-', 'World'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('La-', 'La'), ('---', '--'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18890701-V04-07-page30.txt: [('exam-', 'exam'), ('-PAGE', 'PAGE'), ('-THE', 'THE')] PHJ18890701-V04-07-page8.txt: [('IDOL-', 'IDOL')] PHJ18890701-V04-07-page9.txt: [('com-', 'com'), ('tyran-', 'tyran')] PHJ18890801-V04-08-page1.txt: [('condi-', 'condi'), ('-', ''), ('-PAGE', 'PAGE')] PHJ18890801-V04-08-page10.txt: [('-ginie', 'ginie')] PHJ18890801-V04-08-page11.txt: [('granu-', 'granu')] PHJ18890801-V04-08-page15.txt: [('swal-', 'swal')] PHJ18890801-V04-08-page21.txt: [('WEATHER.-', 'WEATHER.')] PHJ18890801-V04-08-page24.txt: [('-', '')] PHJ18890801-V04-08-page26.txt: [('pi-', 'pi'), ('-', '')] PHJ18890801-V04-08-page27.txt: [('Society-', 'Society'), ('-Lillie', 'Lillie'), ('Tennessee-', 'Tennessee')] PHJ18890801-V04-08-page28.txt: [('World.-', 'World.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18890801-V04-08-page30.txt: [('--', '-'), ('-', ''), ('-', '')] PHJ18890801-V04-08-page31.txt: [('-page', 'page')] PHJ18890801-V04-08-page32.txt: [('TEN-', 'TEN')] PHJ18890801-V04-08-page5.txt: [('in-', 'in')] PHJ18890801-V04-08-page7.txt: [('Hy-', 'Hy')] PHJ18890901-V04-09-page1.txt: [('-', ''), ('-', ''), ('-PAGE', 'PAGE'), ('o-', 'o')] PHJ18890901-V04-09-page14.txt: [('corn-', 'corn'), ('-', '')] PHJ18890901-V04-09-page15.txt: [('per-', 'per'), ('---why', '--why')] PHJ18890901-V04-09-page18.txt: [('conver-', 'conver')] PHJ18890901-V04-09-page19.txt: [('es-', 'es')] PHJ18890901-V04-09-page2.txt: [('Ca-', 'Ca'), ('Albu-', 'Albu'), ('ap-', 'ap')] PHJ18890901-V04-09-page21.txt: [('-should', 'should')] PHJ18890901-V04-09-page23.txt: [('ordi-', 'ordi')] PHJ18890901-V04-09-page24.txt: [('al-', 'al')] PHJ18890901-V04-09-page25.txt: [('Ashby-', 'Ashby'), ("Children's-", "Children's")] PHJ18890901-V04-09-page26.txt: [('HEL-', 'HEL')] PHJ18890901-V04-09-page27.txt: [('-PAGE', 'PAGE'), ('-', ''), ('-', ''), ('-Lillie', 'Lillie'), ('Tennessee-', 'Tennessee')] PHJ18890901-V04-09-page29.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Nat-', 'Nat'), ('La-', 'La'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18890901-V04-09-page30.txt: [('-', ''), ('-', ''), ('-', '')] PHJ18890901-V04-09-page31.txt: [('-', '')] PHJ18890901-V04-09-page4.txt: [('-', ''), ('air."--', 'air."-')] PHJ18890901-V04-09-page5.txt: [('ex-', 'ex')] PHJ18890901-V04-09-page6.txt: [('wife.--', 'wife.-')] PHJ18890901-V04-09-page8.txt: [('dy-', 'dy')] PHJ18891001-V04-10-page1.txt: [('-', ''), ('-PAGE', 'PAGE'), ('stom-', 'stom')] PHJ18891001-V04-10-page11.txt: [('-', '')] PHJ18891001-V04-10-page12.txt: [('stupify-', 'stupify')] PHJ18891001-V04-10-page15.txt: [('com-', 'com')] PHJ18891001-V04-10-page16.txt: [('island-', 'island')] PHJ18891001-V04-10-page17.txt: [('peo-', 'peo'), ('sick-', 'sick')] PHJ18891001-V04-10-page20.txt: [('fel-', 'fel')] PHJ18891001-V04-10-page21.txt: [('ques-', 'ques')] PHJ18891001-V04-10-page26.txt: [('-page', 'page')] PHJ18891001-V04-10-page27.txt: [('-PAGE', 'PAGE'), ('-everal', 'everal'), ('-THE', 'THE'), ('Society-', 'Society'), ('-Lillie', 'Lillie'), ('-', ''), ('-', '')] PHJ18891001-V04-10-page28.txt: [('.-', '.')] PHJ18891001-V04-10-page29.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('quarts----', 'quarts---'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('La-', 'La'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', '')] PHJ18891001-V04-10-page30.txt: [('COMBINING-', 'COMBINING')] PHJ18891001-V04-10-page31.txt: [('-', ''), ('-page', 'page')] PHJ18891001-V04-10-page32.txt: [('--', '-'), ('-', ''), ('--z.', '-z.'), ('IR-', 'IR'), ('-', ''), ('--', '-')] PHJ18891001-V04-10-page6.txt: [('--', '-')] PHJ18891101-V04-11-page1.txt: [('-.P', '.P'), ('antifi-', 'antifi')] PHJ18891101-V04-11-page13.txt: [('gen-', 'gen')] PHJ18891101-V04-11-page14.txt: [('physi-', 'physi'), ('stimula-', 'stimula')] PHJ18891101-V04-11-page15.txt: [('Inter-', 'Inter')] PHJ18891101-V04-11-page17.txt: [('ab-', 'ab')] PHJ18891101-V04-11-page21.txt: [('--', '-'), ('pro-', 'pro')] PHJ18891101-V04-11-page24.txt: [('-hygiene', 'hygiene')] PHJ18891101-V04-11-page25.txt: [("Children's-", "Children's")] PHJ18891101-V04-11-page26.txt: [('TEM-', 'TEM')] PHJ18891101-V04-11-page27.txt: [('-Lillie', 'Lillie'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18891101-V04-11-page29.txt: [('World.-', 'World.'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('publi-', 'publi'), ('-', '')] PHJ18891101-V04-11-page30.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18891101-V04-11-page31.txt: [('-', ''), ('-page', 'page')] PHJ18891101-V04-11-page32.txt: [('es-', 'es'), ('GA-', 'GA')] PHJ18891101-V04-11-page6.txt: [('ear-', 'ear')] PHJ18891101-V04-11-page7.txt: [('Ro-', 'Ro')] PHJ18891101-V04-11-page8.txt: [('acknowl-', 'acknowl'), ('tempta-', 'tempta')] PHJ18891101-V04-11-page9.txt: [('im-', 'im'), ('over-', 'over')] PHJ18891201-V04-12-page1.txt: [('-', ''), ('-P', 'P')] PHJ18891201-V04-12-page12.txt: [('mas-', 'mas')] PHJ18891201-V04-12-page13.txt: [('-', '')] PHJ18891201-V04-12-page14.txt: [('-', '')] PHJ18891201-V04-12-page17.txt: [('-EDITOR', 'EDITOR')] PHJ18891201-V04-12-page19.txt: [('Stiff-', 'Stiff')] PHJ18891201-V04-12-page21.txt: [('meet-', 'meet'), ('immedi-', 'immedi'), ('familiar-', 'familiar'), ('cer-', 'cer'), ('be-', 'be')] PHJ18891201-V04-12-page29.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18891201-V04-12-page30.txt: [('--', '-')] PHJ18891201-V04-12-page31.txt: [('-', ''), ('-page', 'page')] PHJ18891201-V04-12-page32.txt: [('Li-', 'Li'), ('-', '')] PHJ18891201-V04-12-page33.txt: [('died-', 'died'), ('-', ''), ('-', ''), ('-', ''), ('absti-', 'absti'), ('-', ''), ('-', '')] PHJ18891201-V04-12-page34.txt: [('-orming', 'orming'), ('-', ''), ('chil-', 'chil'), ('-', '')] PHJ18891201-V04-12-page7.txt: [('-', ''), ('-', '')] PHJ18900101-V05-01-page1.txt: [('-', ''), ('ad-', 'ad'), ('-dressed', 'dressed'), ('-', '')] PHJ18900101-V05-01-page11.txt: [('ex-', 'ex'), ('par-', 'par')] PHJ18900101-V05-01-page12.txt: [('de-', 'de')] PHJ18900101-V05-01-page13.txt: [('com-', 'com')] PHJ18900101-V05-01-page15.txt: [('police-', 'police'), ('occa-', 'occa')] PHJ18900101-V05-01-page17.txt: [('awak-', 'awak')] PHJ18900101-V05-01-page19.txt: [('-which', 'which')] PHJ18900101-V05-01-page24.txt: [('compres-', 'compres')] PHJ18900101-V05-01-page25.txt: [('-', '')] PHJ18900101-V05-01-page27.txt: [('Society-', 'Society'), ('-Lillie', 'Lillie')] PHJ18900101-V05-01-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('La-', 'La'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18900101-V05-01-page29.txt: [('-FOR', 'FOR'), ('-page', 'page')] PHJ18900101-V05-01-page30.txt: [('-', '')] PHJ18900101-V05-01-page31.txt: [('es-', 'es')] PHJ18900101-V05-01-page32.txt: [('-', ''), ('-Orders', 'Orders')] PHJ18900101-V05-01-page6.txt: [('feet-', 'feet'), ('-"and', '"and')] PHJ18900101-V05-01-page9.txt: [('temp-', 'temp'), ('pat-', 'pat')] PHJ18900201-V05-02-page1.txt: [('-', ''), ('-', ''), ('-PAGE', 'PAGE')] PHJ18900201-V05-02-page10.txt: [('to-', 'to')] PHJ18900201-V05-02-page14.txt: [('neighbor-', 'neighbor')] PHJ18900201-V05-02-page15.txt: [('-', ''), ('under-', 'under')] PHJ18900201-V05-02-page16.txt: [('at-', 'at'), ('-', ''), ('di-', 'di')] PHJ18900201-V05-02-page17.txt: [('ex-', 'ex'), ('un-', 'un')] PHJ18900201-V05-02-page2.txt: [('ab-', 'ab')] PHJ18900201-V05-02-page27.txt: [('Society-', 'Society'), ('-Lillie', 'Lillie'), ('Tennessee-', 'Tennessee')] PHJ18900201-V05-02-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-Ire', 'Ire'), ('Md.-', 'Md.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18900201-V05-02-page29.txt: [('secur-', 'secur'), ('-page', 'page')] PHJ18900201-V05-02-page3.txt: [('...-', '...'), ('-', ''), ('..-', '..'), ('-.VT', '.VT')] PHJ18900201-V05-02-page30.txt: [('-', ''), ('-PAGE', 'PAGE'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18900201-V05-02-page31.txt: [('-', '')] PHJ18900201-V05-02-page4.txt: [('-', '')] PHJ18900201-V05-02-page5.txt: [('in-', 'in'), ('investi-', 'investi'), ('aw-', 'aw'), ('vil-', 'vil')] PHJ18900201-V05-02-page7.txt: [('-', ''), ('-', ''), ('-', ''), ('re-', 're')] PHJ18900301-V05-03-page1.txt: [('-PAGE', 'PAGE'), ('-', ''), ('-', '')] PHJ18900301-V05-03-page13.txt: [('-caliber', 'caliber')] PHJ18900301-V05-03-page15.txt: [('-', '')] PHJ18900301-V05-03-page16.txt: [('ache-', 'ache'), ('--', '-')] PHJ18900301-V05-03-page18.txt: [('abomi-', 'abomi')] PHJ18900301-V05-03-page21.txt: [('-', '')] PHJ18900301-V05-03-page22.txt: [('When-', 'When')] PHJ18900301-V05-03-page23.txt: [('con-', 'con'), ('be-', 'be')] PHJ18900301-V05-03-page24.txt: [('measure-', 'measure')] PHJ18900301-V05-03-page26.txt: [('"Non-', '"Non')] PHJ18900301-V05-03-page27.txt: [('---', '--')] PHJ18900301-V05-03-page28.txt: [('stock-', 'stock')] PHJ18900301-V05-03-page29.txt: [('-', '')] PHJ18900301-V05-03-page30.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-I', 'I'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('quarts-', 'quarts'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('quarts----', 'quarts---'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18900301-V05-03-page32.txt: [('-', ''), ('RESTTeI-', 'RESTTeI'), ('Garri-', 'Garri')] PHJ18900401-V05-04-page1.txt: [('-', ''), ('-', '')] PHJ18900401-V05-04-page11.txt: [('-', '')] PHJ18900401-V05-04-page13.txt: [('de-', 'de')] PHJ18900401-V05-04-page14.txt: [('Con-', 'Con')] PHJ18900401-V05-04-page17.txt: [('nour-', 'nour'), ('mat-', 'mat')] PHJ18900401-V05-04-page21.txt: [('Look-', 'Look'), ('to-', 'to'), ('--', '-')] PHJ18900401-V05-04-page22.txt: [('re-', 're')] PHJ18900401-V05-04-page24.txt: [('"abstain-', '"abstain')] PHJ18900401-V05-04-page26.txt: [('Re-', 'Re')] PHJ18900401-V05-04-page27.txt: [('-Deseret', 'Deseret')] PHJ18900401-V05-04-page28.txt: [('-', '')] PHJ18900401-V05-04-page29.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18900401-V05-04-page30.txt: [('-', '')] PHJ18900401-V05-04-page31.txt: [('-Ts.', 'Ts.'), ('es-', 'es')] PHJ18900401-V05-04-page32.txt: [('I.-', 'I.'), ('r-', 'r'), ('ilst.-', 'ilst.')] PHJ18900401-V05-04-page6.txt: [('car-', 'car')] PHJ18900401-V05-04-page7.txt: [('recogni-', 'recogni'), ('in-', 'in'), ('nu-', 'nu'), ('rela-', 'rela')] PHJ18900401-V05-04-page8.txt: [('com-', 'com')] PHJ18900401-V05-04-page9.txt: [('appe-', 'appe')] PHJ18900501-V05-05-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-PAGE', 'PAGE'), ('con-', 'con')] PHJ18900501-V05-05-page11.txt: [('un-', 'un'), ('in-', 'in'), ('re-', 're')] PHJ18900501-V05-05-page12.txt: [('-', ''), ('--', '-'), ('con-', 'con')] PHJ18900501-V05-05-page14.txt: [('-', ''), ('-', '')] PHJ18900501-V05-05-page15.txt: [('-', '')] PHJ18900501-V05-05-page18.txt: [('ac-', 'ac')] PHJ18900501-V05-05-page19.txt: [('con-', 'con')] PHJ18900501-V05-05-page20.txt: [('H.-', 'H.')] PHJ18900501-V05-05-page21.txt: [('impor-', 'impor')] PHJ18900501-V05-05-page22.txt: [('let-', 'let')] PHJ18900501-V05-05-page23.txt: [('decom-', 'decom'), ('cot-', 'cot')] PHJ18900501-V05-05-page26.txt: [('Mc-', 'Mc'), ('JOUR-', 'JOUR')] PHJ18900501-V05-05-page27.txt: [('Society-', 'Society'), ('Society-', 'Society'), ('-J.', 'J.')] PHJ18900501-V05-05-page28.txt: [('-', ''), ('-', '')] PHJ18900501-V05-05-page29.txt: [('-', '')] PHJ18900501-V05-05-page30.txt: [('tele-', 'tele')] PHJ18900501-V05-05-page31.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18900501-V05-05-page32.txt: [('-', '')] PHJ18900501-V05-05-page8.txt: [('be-', 'be')] PHJ18900601-V05-06-page1.txt: [('-PAGE', 'PAGE')] PHJ18900601-V05-06-page12.txt: [('-thought', 'thought')] PHJ18900601-V05-06-page13.txt: [('run-', 'run')] PHJ18900601-V05-06-page15.txt: [('in-', 'in')] PHJ18900601-V05-06-page18.txt: [('MURRAIN.-', 'MURRAIN.')] PHJ18900601-V05-06-page2.txt: [('-', '')] PHJ18900601-V05-06-page23.txt: [('am-', 'am')] PHJ18900601-V05-06-page24.txt: [('produc-', 'produc')] PHJ18900601-V05-06-page25.txt: [('--."', '-."')] PHJ18900601-V05-06-page26.txt: [('-', '')] PHJ18900601-V05-06-page27.txt: [('Society-', 'Society'), ('Society-', 'Society'), ('-J.', 'J.'), ('Lincoln-', 'Lincoln'), ('Lancet-', 'Lancet')] PHJ18900601-V05-06-page29.txt: [('-', '')] PHJ18900601-V05-06-page3.txt: [('grad-', 'grad')] PHJ18900601-V05-06-page30.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18900601-V05-06-page31.txt: [('-e', 'e'), ('-', '')] PHJ18900601-V05-06-page32.txt: [('-', ''), ('-idesiring', 'idesiring'), ('tele-', 'tele')] PHJ18900601-V05-06-page5.txt: [('re-', 're')] PHJ18900601-V05-06-page6.txt: [('sys-', 'sys')] PHJ18900701-V05-07-page1.txt: [('-', ''), ('-', '')] PHJ18900701-V05-07-page13.txt: [('Tem-', 'Tem')] PHJ18900701-V05-07-page19.txt: [('ousel-', 'ousel')] PHJ18900701-V05-07-page22.txt: [('com-', 'com'), ('de-', 'de'), ('hun-', 'hun'), ('ex-', 'ex')] PHJ18900701-V05-07-page25.txt: [('CLOTHING.-', 'CLOTHING.')] PHJ18900701-V05-07-page26.txt: [('SUR-', 'SUR'), ('-', '')] PHJ18900701-V05-07-page27.txt: [('Society-', 'Society'), ('Society-', 'Society'), ('-J.', 'J.'), ('Lincoln-', 'Lincoln')] PHJ18900701-V05-07-page29.txt: [('-', '')] PHJ18900701-V05-07-page3.txt: [('major-', 'major')] PHJ18900701-V05-07-page30.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('La-', 'La'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18900701-V05-07-page31.txt: [('.twani-', '.twani'), ('-', ''), ('-', ''), ('.-', '.'), ('-', '')] PHJ18900701-V05-07-page32.txt: [('apply-', 'apply')] PHJ18900701-V05-07-page4.txt: [('deodor-', 'deodor')] PHJ18900701-V05-07-page5.txt: [('SELF-', 'SELF')] PHJ18900701-V05-07-page6.txt: [('INDEPEND-', 'INDEPEND'), ('con-', 'con'), ('reduc-', 'reduc')] PHJ18900801-V05-08-page1.txt: [('-', '')] PHJ18900801-V05-08-page12.txt: [('-', '')] PHJ18900801-V05-08-page15.txt: [('side-', 'side')] PHJ18900801-V05-08-page18.txt: [('pre-', 'pre'), ('immedi-', 'immedi')] PHJ18900801-V05-08-page2.txt: [('-', ''), ('quanti-', 'quanti'), ('-', '')] PHJ18900801-V05-08-page20.txt: [('-', '')] PHJ18900801-V05-08-page23.txt: [('ex-', 'ex'), ('hap-', 'hap'), ('treat-', 'treat'), ('at-', 'at')] PHJ18900801-V05-08-page26.txt: [('PROHIBI-', 'PROHIBI'), ('SUR-', 'SUR')] PHJ18900801-V05-08-page27.txt: [('civiliza-', 'civiliza'), ('-page', 'page')] PHJ18900801-V05-08-page28.txt: [('corn-', 'corn'), ('im-', 'im')] PHJ18900801-V05-08-page29.txt: [('-', ''), ('-', ''), ('La-', 'La'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('Cata-', 'Cata'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18900801-V05-08-page30.txt: [('-i', 'i'), ('-Unparalleled', 'Unparalleled'), ('-', '')] PHJ18900801-V05-08-page31.txt: [('Corres-', 'Corres')] PHJ18900801-V05-08-page32.txt: [('--', '-'), ('-', ''), ('-', '')] PHJ18900801-V05-08-page7.txt: [('self-', 'self')] PHJ18900901-V05-09-page1.txt: [('-F', 'F')] PHJ18900901-V05-09-page13.txt: [('--', '-'), ('-', '')] PHJ18900901-V05-09-page14.txt: [('Fif-', 'Fif')] PHJ18900901-V05-09-page15.txt: [('impair-', 'impair')] PHJ18900901-V05-09-page16.txt: [('condition--', 'condition-')] PHJ18900901-V05-09-page17.txt: [('con-', 'con')] PHJ18900901-V05-09-page21.txt: [('min-', 'min')] PHJ18900901-V05-09-page22.txt: [('one-', 'one')] PHJ18900901-V05-09-page23.txt: [('in-', 'in')] PHJ18900901-V05-09-page26.txt: [('Record-', 'Record')] PHJ18900901-V05-09-page27.txt: [('-food', 'food'), ('-page', 'page'), ('dice-', 'dice'), ('Society-', 'Society'), ('Society-', 'Society'), ('-J.', 'J.'), ('Lincoln-', 'Lincoln'), ('-T.', 'T.'), ('Switzerland-', 'Switzerland'), ('Tennessee-', 'Tennessee')] PHJ18900901-V05-09-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('------', '-----'), ('Warrant-', 'Warrant'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18900901-V05-09-page29.txt: [('com-', 'com')] PHJ18900901-V05-09-page31.txt: [('WORLD.-', 'WORLD.'), ('-make', 'make')] PHJ18900901-V05-09-page32.txt: [('E-', 'E'), ('E-', 'E'), ('-DOORS', 'DOORS'), ('pock-', 'pock'), ('---', '--')] PHJ18900901-V05-09-page9.txt: [('quota-', 'quota'), ('com-', 'com'), ('uncom-', 'uncom'), ('mov-', 'mov')] PHJ18901001-V05-10-page1.txt: [('-', ''), ('pa-', 'pa'), ('conta-', 'conta')] PHJ18901001-V05-10-page15.txt: [('seer-', 'seer')] PHJ18901001-V05-10-page16.txt: [('LIQ-', 'LIQ'), ('temper-', 'temper')] PHJ18901001-V05-10-page17.txt: [('fanatic.--', 'fanatic.-')] PHJ18901001-V05-10-page18.txt: [('-', '')] PHJ18901001-V05-10-page2.txt: [('-', '')] PHJ18901001-V05-10-page22.txt: [('"bil-', '"bil')] PHJ18901001-V05-10-page26.txt: [('CON-', 'CON')] PHJ18901001-V05-10-page27.txt: [('-page', 'page'), ('-page', 'page'), ('-page', 'page'), ('Society-', 'Society'), ('Society-', 'Society'), ('-J.', 'J.')] PHJ18901001-V05-10-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('---', '--'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('------', '-----'), ('La-', 'La'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18901001-V05-10-page31.txt: [('tele-', 'tele'), ("'--", "'-")] PHJ18901001-V05-10-page32.txt: [('SATURDAY.-', 'SATURDAY.')] PHJ18901101-V05-11-page1.txt: [('-', ''), ('ex-', 'ex')] PHJ18901101-V05-11-page12.txt: [('par-', 'par'), ('gen-', 'gen')] PHJ18901101-V05-11-page13.txt: [('lounge-', 'lounge')] PHJ18901101-V05-11-page14.txt: [('respect-', 'respect')] PHJ18901101-V05-11-page19.txt: [('IN-', 'IN')] PHJ18901101-V05-11-page2.txt: [("-'", "'")] PHJ18901101-V05-11-page20.txt: [('vege-', 'vege')] PHJ18901101-V05-11-page26.txt: [('ASSOCIA-', 'ASSOCIA'), ('-', ''), ('-WHEREAS', 'WHEREAS')] PHJ18901101-V05-11-page27.txt: [('Society-', 'Society'), ('Sub-', 'Sub')] PHJ18901101-V05-11-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('La-', 'La'), ('lltho-', 'lltho'), ('i.ata-', 'i.ata'), ('-', ''), ('quarts-', 'quarts'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18901101-V05-11-page3.txt: [('-at', 'at')] PHJ18901101-V05-11-page31.txt: [('Tel-', 'Tel'), ('In-', 'In'), ('tele-', 'tele'), ('-', '')] PHJ18901101-V05-11-page32.txt: [('E-', 'E'), ('-', ''), ('-', ''), ('E-', 'E'), ('-', '')] PHJ18901101-V05-11-page6.txt: [('-', ''), ('-', '')] PHJ18901101-V05-11-page9.txt: [('Gilles-de-la-', 'Gilles-de-la')] PHJ18901201-V05-12-page1.txt: [('-.Pzi', '.Pzi'), ('Jour-', 'Jour'), ('al-', 'al')] PHJ18901201-V05-12-page11.txt: [('inter-', 'inter')] PHJ18901201-V05-12-page12.txt: [('abste-', 'abste')] PHJ18901201-V05-12-page14.txt: [('DRUNK-', 'DRUNK'), ('-', '')] PHJ18901201-V05-12-page17.txt: [('-', ''), ('dainty-', 'dainty')] PHJ18901201-V05-12-page2.txt: [('dan-', 'dan')] PHJ18901201-V05-12-page22.txt: [('Dis-', 'Dis')] PHJ18901201-V05-12-page26.txt: [('-', ''), ('PA-', 'PA'), ('-page', 'page'), ('sage-', 'sage')] PHJ18901201-V05-12-page27.txt: [('-page', 'page')] PHJ18901201-V05-12-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('"-', '"'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('For-', 'For'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18901201-V05-12-page3.txt: [('-', ''), ('--those', '-those')] PHJ18901201-V05-12-page30.txt: [('ara---', 'ara--'), ('Tr."-', 'Tr."'), ("-'", "'"), ('Typo-', 'Typo'), ('corres--', 'corres-'), ('--', '-'), ("Itl'-", "Itl'")] PHJ18901201-V05-12-page31.txt: [('tele-', 'tele')] PHJ18901201-V05-12-page33.txt: [('-a', 'a'), ('-', '')] PHJ18901201-V05-12-page34.txt: [('-', '')] PHJ18901201-V05-12-page4.txt: [('con-', 'con'), ('through-', 'through')] PHJ18910101-V06-01-page1.txt: [('-', ''), ('-P', 'P'), ('EAT-', 'EAT')] PHJ18910101-V06-01-page15.txt: [('go-', 'go'), ('re-', 're')] PHJ18910101-V06-01-page19.txt: [('re-', 're')] PHJ18910101-V06-01-page2.txt: [('nutrition-', 'nutrition'), ('in-', 'in'), ('sow-', 'sow')] PHJ18910101-V06-01-page23.txt: [('de-', 'de'), ('ab-', 'ab'), ('some-', 'some'), ('con-', 'con'), ('spar-', 'spar')] PHJ18910101-V06-01-page24.txt: [('-with', 'with')] PHJ18910101-V06-01-page25.txt: [('INDIA.-', 'INDIA.'), ('-', '')] PHJ18910101-V06-01-page26.txt: [('-', ''), ('JOUR-', 'JOUR')] PHJ18910101-V06-01-page27.txt: [('-page', 'page')] PHJ18910101-V06-01-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('La-', 'La'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('treat-', 'treat')] PHJ18910101-V06-01-page3.txt: [('-when', 'when')] PHJ18910101-V06-01-page32.txt: [('-', ''), ("'-", "'"), ('--....', '-....'), ('.--', '.-'), (".'-", ".'"), ('--"', '-"'), ('-ir', 'ir'), ('-', ''), ('-', ''), ('Type-', 'Type'), ('Inter-', 'Inter'), ('corres-', 'corres'), ('--', '-'), ('-gia.', 'gia.'), ('-tftl', 'tftl'), ('-e', 'e'), ('-', ''), ('--', '-')] PHJ18910101-V06-01-page5.txt: [('--', '-'), ('-', ''), ('peo-', 'peo'), ('health-', 'health')] PHJ18910101-V06-01-page8.txt: [('OPPOR-', 'OPPOR')] PHJ18910101-V06-01-page9.txt: [('hpsys-', 'hpsys')] PHJ18910201-V06-02-page1.txt: [('-PAGE', 'PAGE')] PHJ18910201-V06-02-page12.txt: [('-on', 'on')] PHJ18910201-V06-02-page18.txt: [('re-', 're'), ('Courier-', 'Courier'), ('SHIR-', 'SHIR')] PHJ18910201-V06-02-page27.txt: [('Health-', 'Health'), ('-t', 't'), ('ad-', 'ad')] PHJ18910201-V06-02-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18910201-V06-02-page29.txt: [('-', ''), ('stock-', 'stock')] PHJ18910201-V06-02-page31.txt: [('-', '')] PHJ18910201-V06-02-page32.txt: [('-Tn', 'Tn'), ('-r', 'r'), ('Type-', 'Type'), ('Inter-', 'Inter'), ('corres-', 'corres'), ('--', '-'), ('..--', '..-')] PHJ18910201-V06-02-page8.txt: [('COR-', 'COR'), ('con-', 'con')] PHJ18910201-V06-02-page9.txt: [('pre-', 'pre')] PHJ18910301-V06-03-page1.txt: [('-', ''), ('EAT-', 'EAT')] PHJ18910301-V06-03-page13.txt: [('al-', 'al'), ('-of', 'of')] PHJ18910301-V06-03-page14.txt: [('fel-', 'fel')] PHJ18910301-V06-03-page17.txt: [('sweet-', 'sweet')] PHJ18910301-V06-03-page19.txt: [('fami-', 'fami')] PHJ18910301-V06-03-page2.txt: [('-lot', 'lot'), ('use-', 'use')] PHJ18910301-V06-03-page23.txt: [('learn-', 'learn'), ('use-', 'use')] PHJ18910301-V06-03-page24.txt: [('--', '-'), ('pro-', 'pro')] PHJ18910301-V06-03-page28.txt: [('-looking."', 'looking."'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18910301-V06-03-page30.txt: [('PEI-IPSO-', 'PEI-IPSO')] PHJ18910301-V06-03-page31.txt: [('Birth-', 'Birth')] PHJ18910301-V06-03-page32.txt: [('-Arica', 'Arica'), ('Type-', 'Type'), ('corres-', 'corres'), ('--', '-')] PHJ18910401-V06-04-page1.txt: [('-PAGE', 'PAGE'), ('re-', 're')] PHJ18910401-V06-04-page10.txt: [('some-', 'some')] PHJ18910401-V06-04-page11.txt: [('IN-', 'IN')] PHJ18910401-V06-04-page12.txt: [('-', ''), ('-', '')] PHJ18910401-V06-04-page13.txt: [('-', '')] PHJ18910401-V06-04-page14.txt: [('-', '')] PHJ18910401-V06-04-page17.txt: [('un-', 'un')] PHJ18910401-V06-04-page24.txt: [('prosper-', 'prosper')] PHJ18910401-V06-04-page25.txt: [('atmos-', 'atmos')] PHJ18910401-V06-04-page29.txt: [('-', '')] PHJ18910401-V06-04-page3.txt: [('lux-', 'lux')] PHJ18910401-V06-04-page30.txt: [('--', '-'), ('pock-', 'pock')] PHJ18910401-V06-04-page31.txt: [('-ow', 'ow'), ('-Ts.', 'Ts.'), ('MO-', 'MO')] PHJ18910401-V06-04-page32.txt: [('corres-', 'corres'), ('--', '-')] PHJ18910401-V06-04-page4.txt: [('alco-', 'alco')] PHJ18910501-V06-05-page1.txt: [('-PAGE', 'PAGE')] PHJ18910501-V06-05-page10.txt: [('-IXALTH', 'IXALTH')] PHJ18910501-V06-05-page11.txt: [('recom-', 'recom')] PHJ18910501-V06-05-page12.txt: [('-', ''), ('IN-', 'IN')] PHJ18910501-V06-05-page14.txt: [('farm-', 'farm')] PHJ18910501-V06-05-page15.txt: [('constitu-', 'constitu'), ('man-', 'man'), ('im-', 'im')] PHJ18910501-V06-05-page16.txt: [('-', '')] PHJ18910501-V06-05-page17.txt: [('-', ''), ('-', '')] PHJ18910501-V06-05-page2.txt: [('dimin-', 'dimin')] PHJ18910501-V06-05-page20.txt: [('-', '')] PHJ18910501-V06-05-page25.txt: [('Al-', 'Al')] PHJ18910501-V06-05-page27.txt: [('-From', 'From'), ('-ook', 'ook'), ('-development', 'development'), ('-', '')] PHJ18910501-V06-05-page28.txt: [('absorb-', 'absorb')] PHJ18910501-V06-05-page29.txt: [('-FOR-', 'FOR-')] PHJ18910501-V06-05-page31.txt: [('-', ''), ('.--', '.-')] PHJ18910501-V06-05-page32.txt: [('-', ''), ('-', ''), ('corres-', 'corres'), ('--', '-')] PHJ18910501-V06-05-page5.txt: [('de-', 'de')] PHJ18910501-V06-05-page9.txt: [('DYS-', 'DYS')] PHJ18910601-V06-06-page1.txt: [('-', ''), ('-', ''), ('-PAGE', 'PAGE')] PHJ18910601-V06-06-page10.txt: [('sup-', 'sup'), ('mal-', 'mal')] PHJ18910601-V06-06-page11.txt: [('use-', 'use')] PHJ18910601-V06-06-page13.txt: [('CI-', 'CI'), ('I-', 'I'), ('nine-', 'nine')] PHJ18910601-V06-06-page15.txt: [('TO-', 'TO'), ('re-', 're')] PHJ18910601-V06-06-page17.txt: [('-', '')] PHJ18910601-V06-06-page19.txt: [('recom-', 'recom'), ('mis-', 'mis')] PHJ18910601-V06-06-page23.txt: [('-sensible', 'sensible')] PHJ18910601-V06-06-page27.txt: [('mu-', 'mu')] PHJ18910601-V06-06-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18910601-V06-06-page31.txt: [('------', '-----'), ('-.', '.'), ('adver-', 'adver'), ('---', '--'), ('--', '-'), ('Oce-', 'Oce'), ('..---', '..--'), ('-..', '..'), ('..--', '..-'), ('-', ''), ('----', '---'), ('-', ''), ('-', ''), ('-', '')] PHJ18910601-V06-06-page32.txt: [('j-', 'j'), ('-', ''), ('-', ''), ('.--', '.-'), ('Type-', 'Type'), ('corres-', 'corres'), ('--', '-')] PHJ18910601-V06-06-page4.txt: [('-', '')] PHJ18910701-V06-07-page1.txt: [('-', ''), ('-', ''), ('-', ''), ('-PAGE', 'PAGE')] PHJ18910701-V06-07-page12.txt: [('-those', 'those')] PHJ18910701-V06-07-page13.txt: [('INTOX-', 'INTOX')] PHJ18910701-V06-07-page16.txt: [('--man', '-man')] PHJ18910701-V06-07-page19.txt: [('--picture', '-picture')] PHJ18910701-V06-07-page24.txt: [('r.-', 'r.')] PHJ18910701-V06-07-page25.txt: [('-the', 'the')] PHJ18910701-V06-07-page26.txt: [('un-', 'un'), ('-', ''), ('JOUR-', 'JOUR'), ('-', '')] PHJ18910701-V06-07-page28.txt: [('announce-', 'announce'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ("Children's-", "Children's")] PHJ18910701-V06-07-page29.txt: [("'Mimic-", "'Mimic")] PHJ18910701-V06-07-page31.txt: [('-', '')] PHJ18910701-V06-07-page32.txt: [('--', '-'), ('corres-', 'corres'), ('--', '-')] PHJ18910701-V06-07-page4.txt: [('advanta-', 'advanta')] PHJ18910801-V06-08-page1.txt: [('-', ''), ('-', ''), ('-PAGE', 'PAGE')] PHJ18910801-V06-08-page10.txt: [('-IVIrsr', 'IVIrsr')] PHJ18910801-V06-08-page11.txt: [('per-', 'per'), ('ar-', 'ar'), ('med-', 'med')] PHJ18910801-V06-08-page16.txt: [('free.--', 'free.-')] PHJ18910801-V06-08-page19.txt: [('hys-', 'hys')] PHJ18910801-V06-08-page20.txt: [('HKALTI-', 'HKALTI')] PHJ18910801-V06-08-page21.txt: [('-', ''), ('hu-', 'hu')] PHJ18910801-V06-08-page22.txt: [('Answers.-', 'Answers.')] PHJ18910801-V06-08-page23.txt: [('COMMAND-', 'COMMAND'), ('cheap-', 'cheap')] PHJ18910801-V06-08-page24.txt: [('fa-', 'fa')] PHJ18910801-V06-08-page27.txt: [('A-', 'A'), ('ND-', 'ND'), ('-', '')] PHJ18910801-V06-08-page28.txt: [('-', ''), ('-', ''), ('announce-', 'announce'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('price-', 'price'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18910801-V06-08-page29.txt: [('corn-', 'corn')] PHJ18910801-V06-08-page31.txt: [('Par-', 'Par')] PHJ18910801-V06-08-page32.txt: [('ci--', 'ci-'), ('-', ''), ('-.', '.'), ('-', ''), ('..---', '..--'), ('-"', '"'), ('--', '-'), ('Type-', 'Type'), ('corres-', 'corres'), ('--', '-')] PHJ18910801-V06-08-page5.txt: [('be-', 'be')] PHJ18910901-V06-09-page1.txt: [('-', ''), ('-', ''), ('-PAGE', 'PAGE')] PHJ18910901-V06-09-page13.txt: [('be-', 'be')] PHJ18910901-V06-09-page15.txt: [('dis-', 'dis')] PHJ18910901-V06-09-page19.txt: [('-chance', 'chance')] PHJ18910901-V06-09-page22.txt: [('incom-', 'incom'), ('de-', 'de')] PHJ18910901-V06-09-page25.txt: [('after-', 'after'), ('susceptibil-', 'susceptibil')] PHJ18910901-V06-09-page27.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18910901-V06-09-page28.txt: [('announce-', 'announce'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ("Children's-", "Children's")] PHJ18910901-V06-09-page30.txt: [('circa-', 'circa'), ('World.-', 'World.'), ('-', '')] PHJ18910901-V06-09-page31.txt: [('w-', 'w'), ('Paz-', 'Paz'), ('MO-', 'MO')] PHJ18910901-V06-09-page32.txt: [('chil-', 'chil'), ('-', ''), ('-', '')] PHJ18910901-V06-09-page5.txt: [('--Josephine', '-Josephine')] PHJ18910901-V06-09-page8.txt: [('pre-', 'pre'), ('anx-', 'anx')] PHJ18911001-V06-10-page1.txt: [('-', ''), ('-P', 'P'), ('ipe-', 'ipe'), ('impro-', 'impro')] PHJ18911001-V06-10-page10.txt: [('-', '')] PHJ18911001-V06-10-page12.txt: [('AL-', 'AL'), ('--Selected.', '-Selected.')] PHJ18911001-V06-10-page16.txt: [('-lady', 'lady')] PHJ18911001-V06-10-page17.txt: [('in-', 'in')] PHJ18911001-V06-10-page20.txt: [('con-', 'con')] PHJ18911001-V06-10-page21.txt: [('in-', 'in')] PHJ18911001-V06-10-page22.txt: [('CHOCO-', 'CHOCO'), ('molds-', 'molds')] PHJ18911001-V06-10-page3.txt: [('-thought', 'thought')] PHJ18911001-V06-10-page30.txt: [('Chromo-', 'Chromo'), ('-', ''), ('-', ''), ('-', ''), ('-vsrA', 'vsrA')] PHJ18911001-V06-10-page31.txt: [('-', ''), ('-iotograph', 'iotograph'), ("Bird's-", "Bird's")] PHJ18911001-V06-10-page32.txt: [('cata-', 'cata'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18911001-V06-10-page5.txt: [('-', '')] PHJ18911001-V06-10-page7.txt: [('-', '')] PHJ18911101-V06-11-page1.txt: [('-', ''), ('-', ''), ('-PAGE', 'PAGE')] PHJ18911101-V06-11-page10.txt: [('God-', 'God'), ('-funnel', 'funnel')] PHJ18911101-V06-11-page12.txt: [('-', '')] PHJ18911101-V06-11-page17.txt: [('but-', 'but'), ('tuber-', 'tuber')] PHJ18911101-V06-11-page2.txt: [('se-', 'se')] PHJ18911101-V06-11-page20.txt: [('-', '')] PHJ18911101-V06-11-page21.txt: [('Brit-', 'Brit')] PHJ18911101-V06-11-page23.txt: [('ad-', 'ad')] PHJ18911101-V06-11-page25.txt: [('DRESS-', 'DRESS')] PHJ18911101-V06-11-page26.txt: [('-pound', 'pound')] PHJ18911101-V06-11-page28.txt: [('cata-', 'cata'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18911101-V06-11-page29.txt: [('-', '')] PHJ18911101-V06-11-page30.txt: [('-', ''), ('-', '')] PHJ18911101-V06-11-page31.txt: [('--A', '-A'), ('-', ''), ('com-', 'com'), ('-the', 'the'), ('-', ''), ('graphi-', 'graphi'), ('-', ''), ('-', ''), ('in-', 'in'), ('done.-', 'done.'), ('his-', 'his'), ('---....-............z', '--....-............z'), ('book.-', 'book.'), ('-The', 'The')] PHJ18911101-V06-11-page32.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18911101-V06-11-page4.txt: [('man--', 'man-')] PHJ18911101-V06-11-page5.txt: [('inter-', 'inter')] PHJ18911101-V06-11-page9.txt: [('appe-', 'appe')] PHJ18911201-V06-12-page1.txt: [('-PAGE', 'PAGE')] PHJ18911201-V06-12-page10.txt: [('in-', 'in')] PHJ18911201-V06-12-page11.txt: [('TO-', 'TO')] PHJ18911201-V06-12-page22.txt: [('Chem-', 'Chem')] PHJ18911201-V06-12-page28.txt: [('catalogue-', 'catalogue'), ('cata-', 'cata'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18911201-V06-12-page30.txt: [('G-', 'G')] PHJ18911201-V06-12-page31.txt: [('disease--', 'disease-'), ('G-G-', 'G-G'), ('-', ''), ('IN-', 'IN')] PHJ18911201-V06-12-page32.txt: [('-', ''), ('-', '')] PHJ18911201-V06-12-page33.txt: [('-', ''), ('-', '')] PHJ18911201-V06-12-page34.txt: [('-', ''), ('-', ''), ('-', '')] PHJ18911201-V06-12-page6.txt: [('whole-', 'whole')] PHJ18920101-V07-01-page1.txt: [('-', ''), ('-PAGE', 'PAGE')] PHJ18920101-V07-01-page10.txt: [('ex-', 'ex')] PHJ18920101-V07-01-page12.txt: [('--Selected.', '-Selected.')] PHJ18920101-V07-01-page20.txt: [('EF-', 'EF')] PHJ18920101-V07-01-page25.txt: [('self-adorn-', 'self-adorn')] PHJ18920101-V07-01-page27.txt: [('Ir-', 'Ir')] PHJ18920101-V07-01-page28.txt: [('-page', 'page'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('ADVERTIS-', 'ADVERTIS'), ('-', ''), ('-', ''), ('-', '')] PHJ18920101-V07-01-page29.txt: [('r-', 'r'), ('-fho', 'fho'), ('-', '')] PHJ18920101-V07-01-page30.txt: [('-', '')] PHJ18920101-V07-01-page31.txt: [('Ingrow-', 'Ingrow'), ('G-', 'G'), ('-', ''), ('-z', 'z'), ('sell-', 'sell'), ('IN-', 'IN'), ('--', '-')] PHJ18920101-V07-01-page6.txt: [('DIS-', 'DIS')] PHJ18920101-V07-01-page7.txt: [('-', '')] PHJ18920101-V07-01-page9.txt: [('gentle-', 'gentle')] PHJ18920201-V07-02-page1.txt: [('-', ''), ('-PAGE', 'PAGE')] PHJ18920201-V07-02-page12.txt: [('-', '')] PHJ18920201-V07-02-page15.txt: [('-', ''), ('-', '')] PHJ18920201-V07-02-page2.txt: [('con-', 'con')] PHJ18920201-V07-02-page20.txt: [('in-', 'in')] PHJ18920201-V07-02-page22.txt: [('-', ''), ('-', ''), ('--', '-'), ('-', '')] PHJ18920201-V07-02-page25.txt: [('investi-', 'investi')] PHJ18920201-V07-02-page26.txt: [('diagram.--', 'diagram.-')] PHJ18920201-V07-02-page27.txt: [('prog-', 'prog'), ('Secretary-', 'Secretary')] PHJ18920201-V07-02-page28.txt: [('ADVERTIS-', 'ADVERTIS'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18920201-V07-02-page30.txt: [('-ant', 'ant'), ('work."-', 'work."'), ('-', ''), ('-Bible', 'Bible'), ('-Ax', 'Ax')] PHJ18920201-V07-02-page31.txt: [('-', ''), ('-', ''), ('-OF', 'OF')] PHJ18920201-V07-02-page6.txt: [('suf-', 'suf'), ('stom-', 'stom')] PHJ18920201-V07-02-page9.txt: [('re-', 're')] PHJ18920301-V07-03-page1.txt: [('-', ''), ('-PAGE', 'PAGE')] PHJ18920301-V07-03-page10.txt: [('-r', 'r')] PHJ18920301-V07-03-page13.txt: [('-mss', 'mss')] PHJ18920301-V07-03-page17.txt: [('-', '')] PHJ18920301-V07-03-page19.txt: [('--upon', '-upon')] PHJ18920301-V07-03-page20.txt: [('tray-', 'tray')] PHJ18920301-V07-03-page25.txt: [('--', '-')] PHJ18920301-V07-03-page27.txt: [('NEWS-', 'NEWS'), ('illus-', 'illus')] PHJ18920301-V07-03-page28.txt: [('-page', 'page'), ('ADVERTIS-', 'ADVERTIS'), ('-PUBLISHERS', 'PUBLISHERS'), ('the-', 'the'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18920301-V07-03-page30.txt: [('--rice', '-rice')] PHJ18920301-V07-03-page31.txt: [('-FOR-', 'FOR-')] PHJ18920301-V07-03-page32.txt: [('brevi-', 'brevi'), ('Washing-', 'Washing')] PHJ18920301-V07-03-page4.txt: [('-', '')] PHJ18920301-V07-03-page9.txt: [('-the', 'the')] PHJ18920401-V07-04-page1.txt: [('-PAGE', 'PAGE'), ('Dr-', 'Dr')] PHJ18920401-V07-04-page10.txt: [('-U', 'U')] PHJ18920401-V07-04-page11.txt: [('con-', 'con'), ('-', '')] PHJ18920401-V07-04-page12.txt: [('hope-', 'hope')] PHJ18920401-V07-04-page13.txt: [('---', '--'), ('-', ''), ('harm-', 'harm')] PHJ18920401-V07-04-page2.txt: [('Ian-', 'Ian')] PHJ18920401-V07-04-page20.txt: [("-'-'", "'-'")] PHJ18920401-V07-04-page21.txt: [('-', '')] PHJ18920401-V07-04-page26.txt: [('-', '')] PHJ18920401-V07-04-page28.txt: [('COR-', 'COR'), ('digestion-', 'digestion')] PHJ18920401-V07-04-page29.txt: [('-etreatb.', 'etreatb.')] PHJ18920401-V07-04-page3.txt: [('re-', 're')] PHJ18920401-V07-04-page30.txt: [('de-', 'de')] PHJ18920401-V07-04-page32.txt: [('ADVERTIS-', 'ADVERTIS'), ('-page', 'page'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18920401-V07-04-page5.txt: [('employment--', 'employment-'), ('-', '')] PHJ18920401-V07-04-page6.txt: [('BY-', 'BY')] PHJ18920401-V07-04-page7.txt: [('-vrr', 'vrr')] PHJ18920401-V07-04-page9.txt: [('-', '')] PHJ18920501-V07-05-page1.txt: [('-PAGE', 'PAGE')] PHJ18920501-V07-05-page11.txt: [('-', '')] PHJ18920501-V07-05-page15.txt: [('-', '')] PHJ18920501-V07-05-page16.txt: [('-', '')] PHJ18920501-V07-05-page19.txt: [('-', '')] PHJ18920501-V07-05-page20.txt: [('infant-', 'infant')] PHJ18920501-V07-05-page21.txt: [('-', '')] PHJ18920501-V07-05-page22.txt: [('--', '-'), ('-', ''), ('I-', 'I')] PHJ18920501-V07-05-page24.txt: [('COR-', 'COR'), ('con-', 'con')] PHJ18920501-V07-05-page25.txt: [('cos-', 'cos')] PHJ18920501-V07-05-page27.txt: [('-page', 'page'), ('-', '')] PHJ18920501-V07-05-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('La-', 'La'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18920501-V07-05-page31.txt: [('-', ''), ('-', '')] PHJ18920501-V07-05-page32.txt: [('-page', 'page'), ('-', ''), ('-', ''), ('ADVERTIS-', 'ADVERTIS'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18920501-V07-05-page8.txt: [('abstinence.--', 'abstinence.-')] PHJ18920601-V07-06-page1.txt: [('-PAGE', 'PAGE')] PHJ18920601-V07-06-page10.txt: [('medi-', 'medi')] PHJ18920601-V07-06-page11.txt: [('-have', 'have'), ('Anglo-', 'Anglo'), ('-', '')] PHJ18920601-V07-06-page12.txt: [('-', '')] PHJ18920601-V07-06-page14.txt: [('wide-', 'wide')] PHJ18920601-V07-06-page19.txt: [('ham-', 'ham'), ('atten-', 'atten')] PHJ18920601-V07-06-page2.txt: [('-and', 'and')] PHJ18920601-V07-06-page21.txt: [('-.aF', '.aF'), ('-', '')] PHJ18920601-V07-06-page27.txt: [('ST-', 'ST')] PHJ18920601-V07-06-page28.txt: [('-rice', 'rice')] PHJ18920601-V07-06-page3.txt: [('mus-', 'mus')] PHJ18920601-V07-06-page30.txt: [('World-', 'World')] PHJ18920601-V07-06-page32.txt: [('ADVERTIS-', 'ADVERTIS'), ('-page', 'page'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18920601-V07-06-page6.txt: [('EXTER-', 'EXTER')] PHJ18920601-V07-06-page7.txt: [('WAGER-', 'WAGER')] PHJ18920701-V07-07-page1.txt: [('-PAGE', 'PAGE')] PHJ18920701-V07-07-page10.txt: [('reme-', 'reme')] PHJ18920701-V07-07-page16.txt: [('.-', '.')] PHJ18920701-V07-07-page19.txt: [('CHIL-', 'CHIL')] PHJ18920701-V07-07-page25.txt: [('-', '')] PHJ18920701-V07-07-page26.txt: [('COR-', 'COR')] PHJ18920701-V07-07-page27.txt: [('--Philadelphia', '-Philadelphia'), ('Moorish-', 'Moorish'), ('-', ''), ('.-', '.'), ('-', '')] PHJ18920701-V07-07-page28.txt: [('Hygeio-', 'Hygeio'), ('-', ''), ('-', '')] PHJ18920701-V07-07-page29.txt: [('---', '--'), ('-', ''), ('--..', '-..')] PHJ18920701-V07-07-page30.txt: [('-page', 'page')] PHJ18920701-V07-07-page32.txt: [('-', ''), ('-', ''), ('THRILL-', 'THRILL')] PHJ18920701-V07-07-page4.txt: [('-frequent', 'frequent')] PHJ18920701-V07-07-page6.txt: [('--', '-')] PHJ18920801-V07-08-page1.txt: [('-PAGE', 'PAGE'), ('promis-', 'promis')] PHJ18920801-V07-08-page10.txt: [('--IC', '-IC'), ('VENTILA-', 'VENTILA')] PHJ18920801-V07-08-page12.txt: [('-Ak', 'Ak')] PHJ18920801-V07-08-page15.txt: [('ARITH-', 'ARITH'), ('--Sel.', '-Sel.')] PHJ18920801-V07-08-page18.txt: [('-"No', '"No')] PHJ18920801-V07-08-page19.txt: [("-'", "'"), ('-', ''), ('-', ''), ('-"', '"'), ('.-', '.')] PHJ18920801-V07-08-page2.txt: [('en-', 'en')] PHJ18920801-V07-08-page20.txt: [('-"', '"')] PHJ18920801-V07-08-page23.txt: [('-V', 'V')] PHJ18920801-V07-08-page24.txt: [('be-', 'be')] PHJ18920801-V07-08-page25.txt: [('foam-', 'foam')] PHJ18920801-V07-08-page27.txt: [('-', '')] PHJ18920801-V07-08-page28.txt: [('mis-', 'mis')] PHJ18920801-V07-08-page29.txt: [('--Mr.', '-Mr.'), ('--We', '-We'), ('House-', 'House')] PHJ18920801-V07-08-page30.txt: [('--The', '-The'), ('-', ''), ('pul--', 'pul-')] PHJ18920801-V07-08-page32.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18920801-V07-08-page4.txt: [('fa-', 'fa')] PHJ18920901-V07-09-page1.txt: [('-PAGE', 'PAGE')] PHJ18920901-V07-09-page13.txt: [('-', ''), ('-', ''), ('-', '')] PHJ18920901-V07-09-page14.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18920901-V07-09-page15.txt: [('-', '')] PHJ18920901-V07-09-page21.txt: [('--', '-'), ('-', ''), ('-', '')] PHJ18920901-V07-09-page23.txt: [('-', ''), ('-', '')] PHJ18920901-V07-09-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('Cali-', 'Cali')] PHJ18920901-V07-09-page29.txt: [('r-', 'r'), ('-', ''), ('--Mr.', '-Mr.'), ('--A', '-A'), ('--Our', '-Our')] PHJ18920901-V07-09-page30.txt: [('Net-', 'Net'), ('---', '--'), ('lipPH"-', 'lipPH"'), ('-', ''), ('-', ''), ('Cal-', 'Cal'), ('-', '')] PHJ18920901-V07-09-page31.txt: [('-lam', 'lam')] PHJ18920901-V07-09-page32.txt: [('Hygeio-', 'Hygeio'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18920901-V07-09-page4.txt: [('at-', 'at')] PHJ18920901-V07-09-page8.txt: [('-', '')] PHJ18921001-V07-10-page1.txt: [('-PriGE', 'PriGE')] PHJ18921001-V07-10-page10.txt: [('-the', 'the')] PHJ18921001-V07-10-page11.txt: [('-', ''), ('-For', 'For'), ('de-', 'de')] PHJ18921001-V07-10-page13.txt: [('SURF-', 'SURF')] PHJ18921001-V07-10-page14.txt: [('--more', '-more')] PHJ18921001-V07-10-page15.txt: [('grow-', 'grow')] PHJ18921001-V07-10-page17.txt: [('-', ''), ('-', ''), ('in-', 'in')] PHJ18921001-V07-10-page19.txt: [('COM-', 'COM')] PHJ18921001-V07-10-page20.txt: [('pecul-', 'pecul')] PHJ18921001-V07-10-page21.txt: [('-', ''), ('-', '')] PHJ18921001-V07-10-page22.txt: [('-', ''), ('-', ''), ('-', ''), ('---', '--'), ('-', '')] PHJ18921001-V07-10-page23.txt: [('-.', '.'), ('--', '-'), ('t-', 't'), ('pro-', 'pro')] PHJ18921001-V07-10-page24.txt: [('In-', 'In'), ('com-', 'com'), ('vegeta-', 'vegeta')] PHJ18921001-V07-10-page27.txt: [('-', ''), ('-', ''), ('-', '')] PHJ18921001-V07-10-page28.txt: [('Os-', 'Os'), ('-rest', 'rest')] PHJ18921001-V07-10-page29.txt: [('-page', 'page'), ('-', ''), ('-Nonessentials', 'Nonessentials')] PHJ18921001-V07-10-page3.txt: [('-that', 'that')] PHJ18921001-V07-10-page30.txt: [('-', ''), ('-ir', 'ir'), ('-Liol', 'Liol'), ('-', ''), ('E-', 'E'), ('-', '')] PHJ18921001-V07-10-page32.txt: [("-ars'", "ars'"), ('Hygeio-', 'Hygeio'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18921001-V07-10-page4.txt: [("-'ACIFIC", "'ACIFIC")] PHJ18921001-V07-10-page7.txt: [('MED-', 'MED')] PHJ18921001-V07-10-page9.txt: [('-', ''), ('-in', 'in')] PHJ18921101-V07-11-page1.txt: [('-PAGE', 'PAGE'), ('or-', 'or')] PHJ18921101-V07-11-page11.txt: [('Abc--', 'Abc-'), ('-', '')] PHJ18921101-V07-11-page15.txt: [('bless-', 'bless')] PHJ18921101-V07-11-page20.txt: [('-OURNAL.', 'OURNAL.')] PHJ18921101-V07-11-page21.txt: [('-', '')] PHJ18921101-V07-11-page25.txt: [('descrip-', 'descrip')] PHJ18921101-V07-11-page28.txt: [('do-', 'do')] PHJ18921101-V07-11-page29.txt: [('-', ''), ('Non-', 'Non')] PHJ18921101-V07-11-page3.txt: [('-"The', '"The')] PHJ18921101-V07-11-page30.txt: [('-', ''), ('-', ''), ('w-', 'w'), ('-t-.arr.OLIVElSTOIC.', 't-.arr.OLIVElSTOIC.')] PHJ18921101-V07-11-page31.txt: [('I-', 'I'), ('-', '')] PHJ18921101-V07-11-page32.txt: [('-page', 'page'), ('-', ''), ('.-', '.'), ('-', ''), ('-', '')] PHJ18921101-V07-11-page6.txt: [('-e-', 'e-'), ('-', ''), ('-', '')] PHJ18921101-V07-11-page7.txt: [('even--', 'even-')] PHJ18921201-V07-12-page1.txt: [('-PAGE', 'PAGE'), ('--', '-')] PHJ18921201-V07-12-page10.txt: [('CEN-', 'CEN')] PHJ18921201-V07-12-page11.txt: [('ene-', 'ene')] PHJ18921201-V07-12-page18.txt: [('---', '--')] PHJ18921201-V07-12-page21.txt: [('-', ''), ('o-', 'o')] PHJ18921201-V07-12-page22.txt: [('-', '')] PHJ18921201-V07-12-page26.txt: [('perni-', 'perni')] PHJ18921201-V07-12-page29.txt: [('.".-', '.".'), ('-', '')] PHJ18921201-V07-12-page30.txt: [('-C', 'C'), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', '')] PHJ18921201-V07-12-page32.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('there-', 'there')] PHJ18921201-V07-12-page33.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18921201-V07-12-page34.txt: [('com-', 'com'), ('doc-', 'doc'), ('-', '')] PHJ18921201-V07-12-page7.txt: [('-the', 'the')] PHJ18921201-V07-12-page8.txt: [('-', '')] PHJ18960101-V11-01-page12.txt: [('disa-', 'disa')] PHJ18960101-V11-01-page17.txt: [('NERV-', 'NERV')] PHJ18960101-V11-01-page19.txt: [('JOUR-', 'JOUR')] PHJ18960101-V11-01-page2.txt: [('-', ''), ('con-', 'con')] PHJ18960101-V11-01-page22.txt: [('table-', 'table')] PHJ18960101-V11-01-page26.txt: [('-', '')] PHJ18960101-V11-01-page27.txt: [('-to', 'to')] PHJ18960101-V11-01-page29.txt: [('-"-.--.-', '"-.--.-')] PHJ18960101-V11-01-page32.txt: [('Price--', 'Price-'), ('-', '')] PHJ18960101-V11-01-page7.txt: [('re-', 're')] PHJ18960101-V11-01-page8.txt: [('mus-', 'mus')] PHJ18960101-V11-01-page9.txt: [('-', '')] PHJ18960201-V11-02-page12.txt: [('promis-', 'promis')] PHJ18960201-V11-02-page18.txt: [('-', ''), ('-', ''), ('-', '')] PHJ18960201-V11-02-page19.txt: [('health-', 'health')] PHJ18960201-V11-02-page22.txt: [('ASSO-', 'ASSO')] PHJ18960201-V11-02-page26.txt: [('fleet-', 'fleet'), ('sing-', 'sing')] PHJ18960201-V11-02-page28.txt: [('-', '')] PHJ18960201-V11-02-page29.txt: [('-', ''), ('-ARETREAT', 'ARETREAT'), ('---', '--'), ('-', '')] PHJ18960201-V11-02-page31.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('I-', 'I'), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18960201-V11-02-page32.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', '')] PHJ18960201-V11-02-page4.txt: [('-', ''), ('atmos-', 'atmos')] PHJ18960201-V11-02-page5.txt: [('Hebri-', 'Hebri')] PHJ18960301-V11-03-page13.txt: [('in-', 'in'), ('un-', 'un')] PHJ18960301-V11-03-page16.txt: [('assistance-', 'assistance')] PHJ18960301-V11-03-page19.txt: [('con-', 'con'), ('nec-', 'nec')] PHJ18960301-V11-03-page21.txt: [('step-', 'step')] PHJ18960301-V11-03-page31.txt: [('CIR-', 'CIR'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('---', '--'), ('-', ''), ('-', ''), ('-', '')] PHJ18960301-V11-03-page32.txt: [('-', ''), ('--', '-')] PHJ18960301-V11-03-page7.txt: [('Re-', 'Re')] PHJ18960301-V11-03-page8.txt: [('-caliber', 'caliber')] PHJ18960401-V11-04-page2.txt: [('-', ''), ('de-', 'de')] PHJ18960401-V11-04-page21.txt: [('-with', 'with')] PHJ18960401-V11-04-page28.txt: [('demo-', 'demo')] PHJ18960401-V11-04-page29.txt: [('-', ''), ('-RETREAT', 'RETREAT')] PHJ18960401-V11-04-page3.txt: [('-attain', 'attain')] PHJ18960401-V11-04-page30.txt: [('--.....', '-.....')] PHJ18960401-V11-04-page31.txt: [('-', ''), ('-', ''), ('CIA-', 'CIA'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18960401-V11-04-page32.txt: [('-', ''), ('-', ''), ('----', '---')] PHJ18960501-V11-05-page17.txt: [('tend-', 'tend')] PHJ18960501-V11-05-page19.txt: [('ex-', 'ex')] PHJ18960501-V11-05-page21.txt: [('deve.-', 'deve.'), ('heaven-', 'heaven'), ('-', '')] PHJ18960501-V11-05-page23.txt: [('mo-', 'mo')] PHJ18960501-V11-05-page28.txt: [('-', '')] PHJ18960501-V11-05-page31.txt: [('CIR-', 'CIR'), ('-', ''), ('-', ''), ('-', ''), ("'-", "'"), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18960501-V11-05-page32.txt: [('-', '')] PHJ18960501-V11-05-page6.txt: [('-cold', 'cold'), ('multi-', 'multi')] PHJ18960601-V11-06-page19.txt: [('-devote', 'devote')] PHJ18960601-V11-06-page23.txt: [('some-', 'some')] PHJ18960601-V11-06-page25.txt: [('VENTILATION.-', 'VENTILATION.')] PHJ18960601-V11-06-page28.txt: [('Record-', 'Record')] PHJ18960601-V11-06-page29.txt: [('-', ''), ('-', '')] PHJ18960601-V11-06-page31.txt: [('CIR-', 'CIR'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18960601-V11-06-page32.txt: [('use-', 'use'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('.-', '.'), ('-', ''), ('-.-.-', '.-.-'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-tioi', 'tioi'), ('i-', 'i'), ('-', '')] PHJ18960601-V11-06-page6.txt: [('DISSEM-', 'DISSEM')] PHJ18960601-V11-06-page8.txt: [('SICK-', 'SICK')] PHJ18960701-V11-07-page14.txt: [('judg-', 'judg')] PHJ18960701-V11-07-page16.txt: [('spe-', 'spe')] PHJ18960701-V11-07-page19.txt: [('po-', 'po'), ('Chris-', 'Chris'), ('pro-', 'pro')] PHJ18960701-V11-07-page2.txt: [('occu-', 'occu')] PHJ18960701-V11-07-page23.txt: [('STU-', 'STU')] PHJ18960701-V11-07-page25.txt: [('-', ''), ('Peameal--', 'Peameal-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18960701-V11-07-page27.txt: [('-', '')] PHJ18960701-V11-07-page29.txt: [('-', '')] PHJ18960701-V11-07-page3.txt: [('prob-', 'prob'), ('develop-', 'develop')] PHJ18960701-V11-07-page31.txt: [('CIR-', 'CIR'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18960701-V11-07-page32.txt: [('-', ''), ('-...-', '...-'), ('..-.........-', '..-.........'), ('-', ''), ('---Z-', '--Z-'), ('-', ''), ('----', '---'), ('--', '-'), ('-', ''), ('--', '-'), ('-', '')] PHJ18960701-V11-07-page7.txt: [('-', '')] PHJ18960801-V11-08-page13.txt: [('-longer', 'longer'), ('de-', 'de')] PHJ18960801-V11-08-page19.txt: [('med-', 'med')] PHJ18960801-V11-08-page23.txt: [('WOR-', 'WOR')] PHJ18960801-V11-08-page31.txt: [('CIR-', 'CIR'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('DRUGGISTSqk-', 'DRUGGISTSqk'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18960801-V11-08-page8.txt: [('ONES-', 'ONES')] PHJ18960901-V11-09-page14.txt: [('im-', 'im')] PHJ18960901-V11-09-page2.txt: [('-colored', 'colored')] PHJ18960901-V11-09-page20.txt: [('prac-', 'prac')] PHJ18960901-V11-09-page27.txt: [('-was', 'was')] PHJ18960901-V11-09-page29.txt: [('-Mrs.', 'Mrs.')] PHJ18960901-V11-09-page30.txt: [('R-', 'R'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ18961001-V11-10-page1.txt: [('fermenta-', 'fermenta'), ('con-', 'con')] PHJ18961001-V11-10-page10.txt: [('LOCOMO-', 'LOCOMO')] PHJ18961001-V11-10-page14.txt: [('lower--', 'lower-'), ('--touch', '-touch')] PHJ18961001-V11-10-page16.txt: [('ONES-', 'ONES')] PHJ18961001-V11-10-page18.txt: [('CHIL-', 'CHIL')] PHJ18961001-V11-10-page27.txt: [('con-', 'con'), ('Omni-', 'Omni')] PHJ18961001-V11-10-page30.txt: [('--', '-'), ('NOTES-', 'NOTES'), ('ex-', 'ex')] PHJ18961001-V11-10-page31.txt: [('YEAR.-', 'YEAR.'), ('Senii-', 'Senii')] PHJ18961001-V11-10-page32.txt: [('-', ''), ('--', '-'), ('-', ''), ("---'", "--'")] PHJ18961101-V11-11-page10.txt: [('CAPI-', 'CAPI')] PHJ18961101-V11-11-page11.txt: [('it-', 'it')] PHJ18961101-V11-11-page13.txt: [('im-', 'im')] PHJ18961101-V11-11-page16.txt: [('Congrega-', 'Congrega')] PHJ18961101-V11-11-page24.txt: [('-sufficient', 'sufficient')] PHJ18961101-V11-11-page25.txt: [('-', ''), ('-', ''), ('-', '')] PHJ18961101-V11-11-page30.txt: [('NOTES-', 'NOTES')] PHJ18961101-V11-11-page31.txt: [('--The', '-The'), ('Mis-', 'Mis')] PHJ18961101-V11-11-page32.txt: [('-', ''), ('-', ''), ('i-i-', 'i-i'), ('i-', 'i'), ('...--', '...-'), ('-', ''), ('-', ''), ('-', ''), ('-..-', '..-'), ('-', ''), ('-', ''), ('Ittgiiiiiic-', 'Ittgiiiiiic'), ('-', ''), ('-', ''), ('-', '')] PHJ18961101-V11-11-page7.txt: [('con-', 'con')] PHJ18961201-V11-12-page16.txt: [('sympa-', 'sympa')] PHJ18961201-V11-12-page19.txt: [('corn-', 'corn')] PHJ18961201-V11-12-page22.txt: [('--', '-')] PHJ18961201-V11-12-page25.txt: [('---', '--')] PHJ18961201-V11-12-page33.txt: [('rheuma-', 'rheuma')] PHJ18961201-V11-12-page5.txt: [('degrad-', 'degrad')] PHJ18961201-V11-12-page8.txt: [('RAIL-', 'RAIL')] PHJ18990101-V14-01-page1.txt: [('JouR-', 'JouR')] PHJ18990201-V14-02-page1.txt: [('ac-', 'ac')] PHJ18990201-V14-02-page10.txt: [('SPIRIT-', 'SPIRIT')] PHJ18990201-V14-02-page12.txt: [('prop-', 'prop')] PHJ18990201-V14-02-page4.txt: [('condi-', 'condi')] PHJ18990201-V14-02-page6.txt: [('-lot.', 'lot.')] PHJ18990301-V14-03-page1.txt: [('im-', 'im')] PHJ18990301-V14-03-page11.txt: [('-', '')] PHJ18990301-V14-03-page19.txt: [('ASSOCIA-', 'ASSOCIA'), ('PA-', 'PA'), ('JoUR-', 'JoUR')] PHJ18990301-V14-03-page4.txt: [('con-', 'con')] PHJ18990301-V14-03-page7.txt: [('con-', 'con')] PHJ18990301-V14-03-page8.txt: [('PHYSI-', 'PHYSI')] PHJ18990401-V14-04-page1.txt: [('-', '')] PHJ18990401-V14-04-page11.txt: [('-', ''), ('IN-', 'IN')] PHJ18990401-V14-04-page12.txt: [('-has', 'has')] PHJ18990401-V14-04-page13.txt: [('appre-', 'appre')] PHJ18990401-V14-04-page14.txt: [('-', '')] PHJ18990401-V14-04-page15.txt: [('consider-', 'consider'), ('fur-', 'fur')] PHJ18990401-V14-04-page7.txt: [('re-', 're')] PHJ18990501-V14-05-page12.txt: [('burst-', 'burst')] PHJ18990501-V14-05-page16.txt: [('gone."--', 'gone."-')] PHJ18990501-V14-05-page3.txt: [('unload-', 'unload')] PHJ18990601-V14-06-page11.txt: [('ac-', 'ac')] PHJ18990601-V14-06-page13.txt: [('-', '')] PHJ18990601-V14-06-page15.txt: [('noth-', 'noth'), ('recep-', 'recep'), ('opportu-', 'opportu')] PHJ18990601-V14-06-page19.txt: [('Indian-', 'Indian')] PHJ18990601-V14-06-page20.txt: [('-', '')] PHJ18990601-V14-06-page24.txt: [('God-', 'God')] PHJ18990701-V14-07-page10.txt: [('con-', 'con'), ('-could', 'could')] PHJ18990701-V14-07-page14.txt: [('an-', 'an')] PHJ18990701-V14-07-page15.txt: [('HOT-', 'HOT')] PHJ18990701-V14-07-page19.txt: [('DRESS-', 'DRESS')] PHJ18990701-V14-07-page3.txt: [('con-', 'con')] PHJ18990801-V14-08-page10.txt: [('-order', 'order')] PHJ18990801-V14-08-page12.txt: [('Atwater.-', 'Atwater.')] PHJ18990801-V14-08-page15.txt: [('impor-', 'impor')] PHJ18990801-V14-08-page2.txt: [('hence-', 'hence')] PHJ18990801-V14-08-page4.txt: [('func-', 'func')] PHJ18990801-V14-08-page9.txt: [('-', '')] PHJ18990901-V14-09-page10.txt: [('de-', 'de')] PHJ18990901-V14-09-page18.txt: [('-something', 'something')] PHJ18990901-V14-09-page2.txt: [('dis-', 'dis')] PHJ18990901-V14-09-page24.txt: [('-that', 'that')] PHJ18990901-V14-09-page30.txt: [('-"', '"')] PHJ18990901-V14-09-page5.txt: [('thou-', 'thou')] PHJ18991001-V14-10-page13.txt: [('-of', 'of')] PHJ18991001-V14-10-page15.txt: [('--Ella', '-Ella')] PHJ18991001-V14-10-page17.txt: [('--', '-')] PHJ18991001-V14-10-page3.txt: [('G-', 'G'), ('parent-', 'parent')] PHJ18991001-V14-10-page9.txt: [('-', '')] PHJ18991101-V14-11-page15.txt: [('GASTRO-INTESTI-', 'GASTRO-INTESTI')] PHJ18991101-V14-11-page3.txt: [('corn-', 'corn')] PHJ18991101-V14-11-page7.txt: [('pre-', 'pre')] PHJ18991101-V14-11-page9.txt: [('DISINFECT-', 'DISINFECT')] PHJ18991201-V14-12-page12.txt: [('Hundred-', 'Hundred')] PHJ18991201-V14-12-page15.txt: [('ad-', 'ad')] PHJ18991201-V14-12-page18.txt: [('-', '')] PHJ18991201-V14-12-page5.txt: [('--We', '-We')] PHJ18991201-V14-12-page7.txt: [('en-', 'en')] PHJ19010201-V16-02-page12.txt: [('HEALTI-', 'HEALTI'), ('CHRIS-', 'CHRIS')] PHJ19010201-V16-02-page17.txt: [('tissue-', 'tissue')] PHJ19010201-V16-02-page20.txt: [('I-IEALTI-', 'I-IEALTI')] PHJ19010201-V16-02-page21.txt: [('SANI-', 'SANI')] PHJ19010201-V16-02-page22.txt: [('-Health', 'Health')] PHJ19010201-V16-02-page23.txt: [('Hebrew-', 'Hebrew')] PHJ19010201-V16-02-page25.txt: [('under-', 'under')] PHJ19010201-V16-02-page27.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19010201-V16-02-page28.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19010201-V16-02-page29.txt: [('-ow', 'ow')] PHJ19010201-V16-02-page3.txt: [('k---', 'k--'), ('.-', '.'), ('-', ''), ('Ff.-', 'Ff.'), ('rt.C.P-', 'rt.C.P'), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-i', 'i'), ('-', ''), ("-'", "'"), ('.-', '.'), ('--', '-'), ('-', ''), ("-'", "'"), ('-', ''), ('-"', '"'), ('-', ''), ('-lir', 'lir')] PHJ19010201-V16-02-page30.txt: [('..----', '..---')] PHJ19010201-V16-02-page32.txt: [('-str', 'str'), ('-Mir', 'Mir'), ('-stir', 'stir'), ('-stir', 'stir'), ('-stir', 'stir'), ('-stir', 'stir'), ('-stir', 'stir')] PHJ19010201-V16-02-page33.txt: [('t-', 't'), ('t-', 't'), ('t-', 't'), ('tt-', 'tt'), ('t-', 't'), ('-', ''), ('t-', 't'), ('t-', 't'), ('-', ''), ('-', ''), ('-r', 'r'), ('-', ''), ('-', '')] PHJ19010201-V16-02-page34.txt: [('-', ''), ('-', '')] PHJ19010201-V16-02-page4.txt: [('SNAP-', 'SNAP')] PHJ19010201-V16-02-page5.txt: [('night-', 'night')] PHJ19010201-V16-02-page6.txt: [('pre-', 'pre')] PHJ19010201-V16-02-page8.txt: [('-SUPERSTITIONS', 'SUPERSTITIONS')] PHJ19010201-V16-02-page9.txt: [('pre-', 'pre')] PHJ19010301-V16-03-page12.txt: [('exten-', 'exten'), ('-character."', 'character."')] PHJ19010301-V16-03-page19.txt: [('-', '')] PHJ19010301-V16-03-page2.txt: [('-', '')] PHJ19010301-V16-03-page20.txt: [('dan-', 'dan')] PHJ19010301-V16-03-page21.txt: [('-SEVEN', 'SEVEN')] PHJ19010301-V16-03-page25.txt: [('-toprove', 'toprove')] PHJ19010301-V16-03-page26.txt: [('ft-', 'ft'), ('-', ''), ('-', ''), ('-et', 'et'), ('-s-', 's-'), ("-V..'", "V..'"), ('-', ''), ('-i-.', 'i-.'), ('-e', 'e'), ('-', ''), ('-', ''), ('-s-ir', 's-ir'), ('-i.', 'i.'), ('-s--.e', 's--.e'), ('-V', 'V'), ("'--s-", "'--s")] PHJ19010301-V16-03-page27.txt: [('-', ''), ('A-', 'A')] PHJ19010301-V16-03-page28.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19010301-V16-03-page29.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19010301-V16-03-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19010301-V16-03-page30.txt: [('it-', 'it')] PHJ19010301-V16-03-page32.txt: [('-s', 's'), ('-Pi-', 'Pi-'), ('-', '')] PHJ19010301-V16-03-page33.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19010301-V16-03-page34.txt: [('PARLOR-', 'PARLOR')] PHJ19010301-V16-03-page35.txt: [('-', '')] PHJ19010301-V16-03-page5.txt: [('-VOL.', 'VOL.'), ('medi-', 'medi')] PHJ19010301-V16-03-page7.txt: [('ter-', 'ter')] PHJ19010301-V16-03-page8.txt: [('-of', 'of')] PHJ19010401-V16-04-page1.txt: [('-', ''), ('-t', 't')] PHJ19010401-V16-04-page14.txt: [('accumula-', 'accumula')] PHJ19010401-V16-04-page15.txt: [('per-', 'per')] PHJ19010401-V16-04-page2.txt: [('-', ''), ('-', '')] PHJ19010401-V16-04-page24.txt: [('thick-', 'thick')] PHJ19010401-V16-04-page26.txt: [('-', ''), ('--d', '-d')] PHJ19010401-V16-04-page28.txt: [('days.-', 'days.'), ('-', ''), ('-', ''), ('--', '-'), ("-'", "'"), ('-', ''), ('----', '---'), ('-', ''), ('----', '---'), ('--', '-')] PHJ19010401-V16-04-page29.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19010401-V16-04-page3.txt: [('-', '')] PHJ19010401-V16-04-page30.txt: [('-', ''), ('-', '')] PHJ19010401-V16-04-page31.txt: [('Correspond-', 'Correspond'), ('-MAIN', 'MAIN')] PHJ19010401-V16-04-page32.txt: [('-Mr', 'Mr'), ('Code-', 'Code')] PHJ19010401-V16-04-page33.txt: [('--', '-')] PHJ19010401-V16-04-page34.txt: [('Electric-', 'Electric')] PHJ19010401-V16-04-page9.txt: [('base-', 'base')] PHJ19010501-V16-05-page14.txt: [('how-', 'how')] PHJ19010501-V16-05-page17.txt: [('climato-', 'climato'), ('hydro-', 'hydro')] PHJ19010501-V16-05-page19.txt: [('transmi-', 'transmi')] PHJ19010501-V16-05-page2.txt: [('-', ''), ('-The', 'The')] PHJ19010501-V16-05-page21.txt: [('-', ''), ('EALTI-', 'EALTI')] PHJ19010501-V16-05-page24.txt: [('SCI-', 'SCI')] PHJ19010501-V16-05-page27.txt: [('--', '-')] PHJ19010501-V16-05-page29.txt: [('triu.K-', 'triu.K'), ('-trio.', 'trio.')] PHJ19010501-V16-05-page30.txt: [('-', ''), ('----.', '---.'), ('M-', 'M')] PHJ19010501-V16-05-page31.txt: [('-g', 'g'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19010501-V16-05-page32.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-r-', 'r-'), ('-', ''), ('i-', 'i'), ('-', ''), ('r-', 'r'), ('-', '')] PHJ19010501-V16-05-page34.txt: [('-', '')] PHJ19010501-V16-05-page35.txt: [('---', '--'), ("'-", "'"), ('.-', '.'), ('-', ''), ('.--', '.-'), ('-', ''), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-Ye', 'Ye'), ('-', ''), ('-tt-', 'tt-'), ('i-', 'i'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-t-', 't-'), ('-', '')] PHJ19010501-V16-05-page36.txt: [('-', ''), ('-', ''), ('-s', 's')] PHJ19010501-V16-05-page37.txt: [('-', '')] PHJ19010501-V16-05-page5.txt: [('THO-FORMO-', 'THO-FORMO')] PHJ19010601-V16-06-page1.txt: [('-w', 'w')] PHJ19010601-V16-06-page13.txt: [('-', ''), ('battle-', 'battle')] PHJ19010601-V16-06-page17.txt: [('Well-', 'Well'), ('-cooked', 'cooked'), ('-eliminated', 'eliminated'), ('-increased', 'increased')] PHJ19010601-V16-06-page18.txt: [('con--', 'con-')] PHJ19010601-V16-06-page2.txt: [('-', ''), ('Pala-', 'Pala')] PHJ19010601-V16-06-page3.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19010601-V16-06-page32.txt: [('-AK', 'AK')] PHJ19010601-V16-06-page33.txt: [('Pow-', 'Pow'), ('rose-', 'rose')] PHJ19010601-V16-06-page34.txt: [('thor-', 'thor')] PHJ19010601-V16-06-page36.txt: [('as-', 'as')] PHJ19010601-V16-06-page38.txt: [('dis-', 'dis')] PHJ19010601-V16-06-page40.txt: [('-', '')] PHJ19010601-V16-06-page44.txt: [('-ipes', 'ipes')] PHJ19010601-V16-06-page46.txt: [('..-', '..'), ('E--', 'E-'), ('--', '-'), ('L-', 'L'), ('-.r', '.r'), ('-', ''), ('-', ''), ('-', ''), ('-page', 'page'), ('TEN-', 'TEN')] PHJ19010601-V16-06-page47.txt: [('-', ''), ('-"fir', '"fir'), ('-', '')] PHJ19010601-V16-06-page48.txt: [('Correspond-', 'Correspond')] PHJ19010601-V16-06-page49.txt: [('-------', '------'), ('-', ''), ('-s"', 's"'), ('THO-FORMO-', 'THO-FORMO')] PHJ19010601-V16-06-page50.txt: [('Tr-Q.-', 'Tr-Q.'), ('--', '-'), ('-zi', 'zi'), ('-', ''), ('--', '-'), ('-', '')] PHJ19010601-V16-06-page51.txt: [('-', ''), ('-', '')] PHJ19010601-V16-06-page53.txt: [("-'", "'")] PHJ19010601-V16-06-page7.txt: [('--they', '-they')] PHJ19010701-V16-07-page1.txt: [('-', ''), ('-', '')] PHJ19010701-V16-07-page11.txt: [('I-', 'I')] PHJ19010701-V16-07-page12.txt: [('---', '--')] PHJ19010701-V16-07-page13.txt: [('-', '')] PHJ19010701-V16-07-page15.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19010701-V16-07-page17.txt: [('-the', 'the')] PHJ19010701-V16-07-page2.txt: [('-', ''), ('-Two', 'Two')] PHJ19010701-V16-07-page24.txt: [('SCI-', 'SCI')] PHJ19010701-V16-07-page25.txt: [('Do-', 'Do'), ('Men-', 'Men')] PHJ19010701-V16-07-page26.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19010701-V16-07-page27.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('E-', 'E'), ("'-", "'"), ('-', ''), ("-Man's", "Man's"), ('-Preparation', 'Preparation'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-page', 'page'), ('TEM-', 'TEM')] PHJ19010701-V16-07-page28.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19010701-V16-07-page29.txt: [('"--', '"-'), ('t.-', 't.'), ('-"-""-', '"-""-'), ('-', ''), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('THO-FORMO-', 'THO-FORMO')] PHJ19010701-V16-07-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.s-ie', '.s-ie'), ('.-', '.'), ('-frir', 'frir')] PHJ19010701-V16-07-page30.txt: [('------', '-----'), ('-', ''), ('-', ''), ('-', ''), ('--.', '-.'), ('-', '')] PHJ19010701-V16-07-page31.txt: [('-', ''), ('EL-', 'EL'), ('-', '')] PHJ19010701-V16-07-page32.txt: [('-', ''), ('-', '')] PHJ19010701-V16-07-page33.txt: [('-ryvvyyl-', 'ryvvyyl-'), ('-', ''), ('-', '')] PHJ19010701-V16-07-page34.txt: [('-', '')] PHJ19010701-V16-07-page35.txt: [('-STREET', 'STREET')] PHJ19010801-V16-08-page1.txt: [('-', '')] PHJ19010801-V16-08-page14.txt: [('-Tir', 'Tir'), ('-beautiful', 'beautiful')] PHJ19010801-V16-08-page2.txt: [('-', ''), ('-', '')] PHJ19010801-V16-08-page27.txt: [("-IERS'", "IERS'"), ('-with', 'with')] PHJ19010801-V16-08-page28.txt: [('BRUSI-', 'BRUSI')] PHJ19010801-V16-08-page29.txt: [('-', ''), ('irrr-', 'irrr'), ('--', '-'), ('--', '-'), ('-', ''), ("-'", "'"), ('-', ''), ('MENTHO-FORMO-', 'MENTHO-FORMO')] PHJ19010801-V16-08-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-s-', 's-')] PHJ19010801-V16-08-page30.txt: [('-', '')] PHJ19010801-V16-08-page31.txt: [('-', ''), ('-', '')] PHJ19010801-V16-08-page32.txt: [('-', ''), ('-', '')] PHJ19010801-V16-08-page33.txt: [('--iri', '-iri'), ('i-', 'i')] PHJ19010801-V16-08-page4.txt: [('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('--v--', '-v--'), ('-page', 'page'), ('TEM-', 'TEM')] PHJ19010801-V16-08-page7.txt: [('tend-', 'tend'), ('pro-', 'pro'), ('per-', 'per')] PHJ19010901-V16-09-page17.txt: [('EALTI-', 'EALTI')] PHJ19010901-V16-09-page2.txt: [('-', '')] PHJ19010901-V16-09-page20.txt: [('-', ''), ('EALTI-', 'EALTI')] PHJ19010901-V16-09-page29.txt: [('-stir', 'stir')] PHJ19010901-V16-09-page3.txt: [('t"..-', 't"..'), ('-', ''), ('t-', 't'), ('--A', '-A'), ('--', '-'), ('.-', '.'), ('-', ''), ('-', ''), ('..-', '..'), ('-', ''), ('-', ''), ('-i-', 'i-'), ('---', '--'), ('-s-', 's-'), ('.-', '.'), ('-s-.', 's-.'), ('-.-.', '.-.')] PHJ19010901-V16-09-page30.txt: [('-----', '----'), ('-...', '...'), ('-', ''), ('-', ''), ('-vAi', 'vAi'), ('...-', '...'), ('-', ''), ('ilEALTI-', 'ilEALTI'), ('-', ''), ('-eeeeee-eeeest', 'eeeeee-eeeest')] PHJ19010901-V16-09-page32.txt: [('-', ''), ('-', '')] PHJ19010901-V16-09-page33.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19010901-V16-09-page34.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19010901-V16-09-page6.txt: [('con-', 'con')] PHJ19010901-V16-09-page8.txt: [('persever-', 'persever')] PHJ19011001-V16-10-page1.txt: [('-', ''), ('-', '')] PHJ19011001-V16-10-page11.txt: [('Fielding-', 'Fielding')] PHJ19011001-V16-10-page2.txt: [('-', '')] PHJ19011001-V16-10-page21.txt: [('IlEALTI-', 'IlEALTI')] PHJ19011001-V16-10-page23.txt: [('I-IEALTI-', 'I-IEALTI')] PHJ19011001-V16-10-page25.txt: [('-', ''), ('EALTI-', 'EALTI')] PHJ19011001-V16-10-page26.txt: [('HEALTI-', 'HEALTI')] PHJ19011001-V16-10-page28.txt: [('-', '')] PHJ19011001-V16-10-page3.txt: [('-', ''), ('--', '-'), ('-', ''), ('tss-', 'tss'), ('-i-', 'i-'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-P.', 'P.'), ('-', ''), ('-', ''), ('-s-', 's-')] PHJ19011001-V16-10-page30.txt: [('--', '-'), ('-', ''), ('--------', '-------'), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19011001-V16-10-page31.txt: [('-', ''), ('-', '')] PHJ19011001-V16-10-page32.txt: [('HEALTI-', 'HEALTI'), ('-', ''), ('-', '')] PHJ19011001-V16-10-page34.txt: [('-', ''), ('-', '')] PHJ19011001-V16-10-page35.txt: [('-', ''), ('-frfr', 'frfr')] PHJ19011101-V16-11-page14.txt: [('HEALTI-', 'HEALTI')] PHJ19011101-V16-11-page29.txt: [('-', '')] PHJ19011101-V16-11-page3.txt: [('HEL-', 'HEL'), ('SAD-', 'SAD')] PHJ19011101-V16-11-page30.txt: [('-', ''), ('----', '---'), ('-', ''), ('---', '--'), ('--', '-'), ('---', '--'), ('-', ''), ('-', ''), ('-', '')] PHJ19011101-V16-11-page33.txt: [('-', ''), ('-frfr', 'frfr'), ('-', '')] PHJ19011101-V16-11-page34.txt: [('-', ''), ('-', '')] PHJ19011101-V16-11-page35.txt: [('-lir', 'lir'), ('-', ''), ('-', ''), ('-', ''), ('.i-', '.i')] PHJ19011101-V16-11-page36.txt: [('-', ''), ('-', '')] PHJ19011101-V16-11-page37.txt: [('-if', 'if')] PHJ19011101-V16-11-page7.txt: [('TUBERCU-', 'TUBERCU')] PHJ19011201-V16-12-page1.txt: [('MON-', 'MON')] PHJ19011201-V16-12-page17.txt: [('ef-', 'ef')] PHJ19011201-V16-12-page2.txt: [('-', ''), ('-.', '.'), ('-', ''), ('.-', '.'), ('----', '---'), ('-..-', '..-'), ('t-', 't'), ('-', ''), ('Li--', 'Li-'), ('-', ''), ('-', ''), ("-'", "'"), ('"--', '"-'), ('---.', '--.'), ('-...', '...')] PHJ19011201-V16-12-page25.txt: [('irregu-', 'irregu'), ('weak-', 'weak')] PHJ19011201-V16-12-page3.txt: [('-', '')] PHJ19011201-V16-12-page31.txt: [('-only', 'only')] PHJ19011201-V16-12-page33.txt: [('-large', 'large')] PHJ19011201-V16-12-page34.txt: [('ap-', 'ap')] PHJ19011201-V16-12-page39.txt: [('-', ''), ('---', '--'), ('---', '--'), ('P-', 'P'), ('..-', '..'), ('---', '--')] PHJ19011201-V16-12-page4.txt: [('-', ''), ('-W', 'W'), ('.-', '.'), ('-', ''), ('--.', '-.'), ("-r.'.'.", "r.'.'."), ('--', '-'), ('---', '--'), ('-', ''), ('--', '-'), ('---', '--'), ('-', ''), ('-', ''), ('-"N', '"N'), ('.....-', '.....')] PHJ19011201-V16-12-page40.txt: [('-', '')] PHJ19011201-V16-12-page44.txt: [('vtouOwttitiOK-', 'vtouOwttitiOK')] PHJ19011201-V16-12-page45.txt: [('-', ''), ('-', ''), ('-STREET', 'STREET')] PHJ19020101-V17-01-page13.txt: [('-oluer.', 'oluer.')] PHJ19020101-V17-01-page16.txt: [('--tartaric', '-tartaric')] PHJ19020101-V17-01-page17.txt: [('essen-', 'essen')] PHJ19020101-V17-01-page19.txt: [('concludes--', 'concludes-')] PHJ19020101-V17-01-page28.txt: [('ELECTRIC-', 'ELECTRIC'), ('applica-', 'applica')] PHJ19020101-V17-01-page29.txt: [('ELECTRIC-', 'ELECTRIC')] PHJ19020101-V17-01-page3.txt: [('-', ''), ('-', '')] PHJ19020101-V17-01-page36.txt: [('t-', 't'), ('t-', 't'), ('i-', 'i'), ('-', ''), ('t-', 't'), ('-', ''), ('t-', 't'), ('t-', 't'), ('i-', 'i'), ('t-', 't'), ('.-', '.'), ('-', ''), ('t-', 't'), ('t-', 't'), ('-', ''), ('t-', 't')] PHJ19020101-V17-01-page37.txt: [('-', ''), ('-tou.witriu', 'tou.witriu'), ('if-', 'if'), ('-tou', 'tou')] PHJ19020201-V17-02-page16.txt: [('destruc-', 'destruc')] PHJ19020201-V17-02-page2.txt: [('-', '')] PHJ19020201-V17-02-page20.txt: [('mar-', 'mar')] PHJ19020201-V17-02-page30.txt: [('-----lee.', '----lee.'), ('-', ''), ('-ffr', 'ffr'), ('Vice-', 'Vice')] PHJ19020201-V17-02-page31.txt: [('-i', 'i'), ('dif-', 'dif'), ('---', '--')] PHJ19020201-V17-02-page32.txt: [('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ('-t', 't')] PHJ19020201-V17-02-page34.txt: [('BABY-', 'BABY')] PHJ19020201-V17-02-page35.txt: [('-', '')] PHJ19020201-V17-02-page9.txt: [('aggra-', 'aggra')] PHJ19020301-V17-03-page2.txt: [('can-', 'can'), ('Z-', 'Z'), ('SIXTY-', 'SIXTY')] PHJ19020301-V17-03-page21.txt: [('indus-', 'indus')] PHJ19020301-V17-03-page22.txt: [('eter-', 'eter')] PHJ19020301-V17-03-page27.txt: [('preven-', 'preven')] PHJ19020301-V17-03-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19020301-V17-03-page33.txt: [('-', '')] PHJ19020301-V17-03-page34.txt: [('-v', 'v')] PHJ19020301-V17-03-page37.txt: [('-', '')] PHJ19020301-V17-03-page5.txt: [('-', '')] PHJ19020401-V17-04-page13.txt: [('constipa-', 'constipa')] PHJ19020401-V17-04-page22.txt: [("advantages.'.-", "advantages.'."), ('pres-', 'pres')] PHJ19020401-V17-04-page29.txt: [('book-', 'book')] PHJ19020401-V17-04-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19020401-V17-04-page31.txt: [('.--------', '.-------'), ('--.--', '-.--'), ('---.--', '--.--'), ('..--', '..-')] PHJ19020401-V17-04-page32.txt: [('-------', '------'), ('-', ''), ('-', ''), ("'-'-'-", "'-'-'"), ('A.-', 'A.'), ('-', '')] PHJ19020401-V17-04-page5.txt: [('-', '')] PHJ19020501-V17-05-page14.txt: [('re-', 're')] PHJ19020501-V17-05-page15.txt: [('how-', 'how')] PHJ19020501-V17-05-page18.txt: [('sub-', 'sub'), ('un-', 'un'), ('solu-', 'solu'), ('useful-', 'useful')] PHJ19020501-V17-05-page21.txt: [('-a', 'a')] PHJ19020501-V17-05-page23.txt: [('-', '')] PHJ19020501-V17-05-page27.txt: [('supply-', 'supply')] PHJ19020501-V17-05-page28.txt: [('PA-', 'PA')] PHJ19020501-V17-05-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19020501-V17-05-page9.txt: [('-', '')] PHJ19020601-V17-06-page10.txt: [('--since', '-since')] PHJ19020601-V17-06-page12.txt: [('-', '')] PHJ19020601-V17-06-page13.txt: [('SAN-', 'SAN')] PHJ19020601-V17-06-page14.txt: [('"--', '"-')] PHJ19020601-V17-06-page16.txt: [('unevan-', 'unevan')] PHJ19020601-V17-06-page2.txt: [('SUBSCRIP-', 'SUBSCRIP')] PHJ19020601-V17-06-page24.txt: [('irri-', 'irri')] PHJ19020601-V17-06-page26.txt: [('posi-', 'posi')] PHJ19020601-V17-06-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19020601-V17-06-page32.txt: [('Govern-', 'Govern')] PHJ19020601-V17-06-page7.txt: [('-eaten', 'eaten')] PHJ19020701-V17-07-page10.txt: [('con-', 'con')] PHJ19020701-V17-07-page11.txt: [('--', '-')] PHJ19020701-V17-07-page12.txt: [('pro-', 'pro'), ('fif-', 'fif')] PHJ19020701-V17-07-page14.txt: [('com-', 'com')] PHJ19020701-V17-07-page2.txt: [('SUBSCRIP-', 'SUBSCRIP')] PHJ19020701-V17-07-page23.txt: [('min-', 'min')] PHJ19020701-V17-07-page24.txt: [('re-', 're')] PHJ19020701-V17-07-page25.txt: [('-cream', 'cream')] PHJ19020701-V17-07-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19020701-V17-07-page30.txt: [('-', ''), ('-', '')] PHJ19020701-V17-07-page35.txt: [('r-', 'r')] PHJ19020701-V17-07-page5.txt: [('temper-', 'temper')] PHJ19020801-V17-08-page13.txt: [('-', ''), ('-', '')] PHJ19020801-V17-08-page15.txt: [('al-', 'al')] PHJ19020801-V17-08-page2.txt: [('-', '')] PHJ19020801-V17-08-page25.txt: [('-', '')] PHJ19020801-V17-08-page28.txt: [('JOUR-', 'JOUR')] PHJ19020801-V17-08-page29.txt: [('Train-', 'Train')] PHJ19020801-V17-08-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19020801-V17-08-page30.txt: [('---', '--'), ('-', ''), ('--......', '-......'), ('-.', '.'), ('-', ''), ('----', '---'), ('-', ''), ('---', '--')] PHJ19020801-V17-08-page31.txt: [('-', '')] PHJ19020801-V17-08-page5.txt: [('com-', 'com')] PHJ19020801-V17-08-page7.txt: [('re-', 're')] PHJ19020801-V17-08-page8.txt: [('re-', 're')] PHJ19020901-V17-09-page1.txt: [('-', ''), ("'-", "'"), ('-', '')] PHJ19020901-V17-09-page10.txt: [('dif-', 'dif')] PHJ19020901-V17-09-page11.txt: [('--In', '-In'), ('--Certainly', '-Certainly'), ('deterio-', 'deterio')] PHJ19020901-V17-09-page13.txt: [('cya-', 'cya')] PHJ19020901-V17-09-page15.txt: [('yes-', 'yes')] PHJ19020901-V17-09-page18.txt: [('consumption.--', 'consumption.-')] PHJ19020901-V17-09-page2.txt: [('-.onto', '.onto')] PHJ19020901-V17-09-page22.txt: [('--Dietetic', '-Dietetic')] PHJ19020901-V17-09-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19020901-V17-09-page34.txt: [('--', '-'), ('-', '')] PHJ19020901-V17-09-page4.txt: [('-', ''), ('-', '')] PHJ19020901-V17-09-page7.txt: [('-and', 'and'), ('QUANTI-', 'QUANTI')] PHJ19021001-V17-10-page11.txt: [('pro-', 'pro')] PHJ19021001-V17-10-page15.txt: [('---the', '--the'), ('heal-', 'heal')] PHJ19021001-V17-10-page18.txt: [('in--', 'in-')] PHJ19021001-V17-10-page2.txt: [('-', '')] PHJ19021001-V17-10-page21.txt: [('-herself', 'herself')] PHJ19021001-V17-10-page22.txt: [('to-', 'to')] PHJ19021001-V17-10-page28.txt: [('-will', 'will'), ('-page', 'page')] PHJ19021001-V17-10-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19021001-V17-10-page34.txt: [('-', '')] PHJ19021101-V17-11-page1.txt: [('-S', 'S')] PHJ19021101-V17-11-page10.txt: [('influ-', 'influ'), ('-', ''), ('phys-', 'phys')] PHJ19021101-V17-11-page11.txt: [('-', '')] PHJ19021101-V17-11-page12.txt: [('irritability--', 'irritability-'), ('-', '')] PHJ19021101-V17-11-page13.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19021101-V17-11-page19.txt: [('-physical', 'physical')] PHJ19021101-V17-11-page2.txt: [('-', '')] PHJ19021101-V17-11-page22.txt: [('-', ''), ("and'Bata-", "and'Bata")] PHJ19021101-V17-11-page25.txt: [('con-', 'con')] PHJ19021101-V17-11-page29.txt: [('Train-', 'Train')] PHJ19021101-V17-11-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19021101-V17-11-page34.txt: [('NI-', 'NI'), ('-', ''), ('-', '')] PHJ19021101-V17-11-page36.txt: [('like-', 'like')] PHJ19021101-V17-11-page6.txt: [('dis-', 'dis'), ('vig-', 'vig')] PHJ19021101-V17-11-page7.txt: [('hay-', 'hay'), ('unhy-', 'unhy')] PHJ19021201-V17-12-page20.txt: [('perish-', 'perish')] PHJ19021201-V17-12-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19021201-V17-12-page31.txt: [('-cent', 'cent')] PHJ19021201-V17-12-page32.txt: [('-ice-President', 'ice-President')] PHJ19021201-V17-12-page34.txt: [('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19021201-V17-12-page5.txt: [('tem-', 'tem'), ('be-', 'be')] PHJ19030101-V18-01-page11.txt: [("-day's", "day's")] PHJ19030101-V18-01-page13.txt: [('---', '--')] PHJ19030101-V18-01-page15.txt: [('-', '')] PHJ19030101-V18-01-page2.txt: [('Temper-', 'Temper')] PHJ19030101-V18-01-page22.txt: [('-', ''), ('-', '')] PHJ19030101-V18-01-page27.txt: [('DISINFECT-', 'DISINFECT'), ('OR-', 'OR')] PHJ19030101-V18-01-page29.txt: [('con-', 'con')] PHJ19030101-V18-01-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19030101-V18-01-page30.txt: [('-', '')] PHJ19030101-V18-01-page31.txt: [('Train-', 'Train')] PHJ19030101-V18-01-page34.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19030101-V18-01-page35.txt: [('.-', '.'), ('f-', 'f'), ('-.', '.'), ('L-', 'L'), ('..-', '..'), ('.-', '.'), ('.-', '.')] PHJ19030101-V18-01-page5.txt: [('strengthen-', 'strengthen')] PHJ19030101-V18-01-page7.txt: [('in-', 'in')] PHJ19030201-V18-02-page16.txt: [('Ga-', 'Ga')] PHJ19030201-V18-02-page22.txt: [('be-', 'be')] PHJ19030201-V18-02-page25.txt: [('JouR-', 'JouR')] PHJ19030201-V18-02-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19030201-V18-02-page34.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19030201-V18-02-page35.txt: [('-', ''), ('-', '')] PHJ19030201-V18-02-page36.txt: [('-', '')] PHJ19030201-V18-02-page9.txt: [('neural-', 'neural')] PHJ19030301-V18-03-page11.txt: [('walk-', 'walk'), ('impor-', 'impor')] PHJ19030301-V18-03-page12.txt: [('dis-', 'dis')] PHJ19030301-V18-03-page18.txt: [("-Woman's", "Woman's")] PHJ19030301-V18-03-page25.txt: [('per-', 'per')] PHJ19030301-V18-03-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19030301-V18-03-page31.txt: [('-', '')] PHJ19030301-V18-03-page34.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19030301-V18-03-page5.txt: [('prevent-', 'prevent')] PHJ19030301-V18-03-page7.txt: [('-another', 'another')] PHJ19030301-V18-03-page9.txt: [('peo-', 'peo')] PHJ19030401-V18-04-page13.txt: [('Emer-', 'Emer')] PHJ19030401-V18-04-page16.txt: [('--allowing', '-allowing')] PHJ19030401-V18-04-page2.txt: [('-', '')] PHJ19030401-V18-04-page23.txt: [('theo-', 'theo'), ('ques-', 'ques')] PHJ19030401-V18-04-page26.txt: [('-He', 'He')] PHJ19030401-V18-04-page28.txt: [('STAM-', 'STAM')] PHJ19030401-V18-04-page29.txt: [('-', ''), ('-page', 'page'), ('-page', 'page')] PHJ19030401-V18-04-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19030401-V18-04-page30.txt: [('---------', '--------'), ('-', ''), ('Ar--', 'Ar-'), ('-', ''), ('-', ''), ('.-', '.'), ('-', ''), ('-', '')] PHJ19030401-V18-04-page34.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19030401-V18-04-page35.txt: [('tingtatnitfr-', 'tingtatnitfr'), ('T-', 'T'), ('-', '')] PHJ19030401-V18-04-page36.txt: [('-', ''), ('--..', '-..')] PHJ19030401-V18-04-page5.txt: [('-', ''), ('-it', 'it')] PHJ19030401-V18-04-page8.txt: [('in-', 'in')] PHJ19030501-V18-05-page19.txt: [('EDU-', 'EDU')] PHJ19030501-V18-05-page2.txt: [('NENNP--now-', 'NENNP--now')] PHJ19030501-V18-05-page20.txt: [('quarrel-', 'quarrel')] PHJ19030501-V18-05-page24.txt: [('JOUR-', 'JOUR')] PHJ19030501-V18-05-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19030501-V18-05-page30.txt: [('-.bur', '.bur'), ('w-', 'w'), ('-', ''), ('-', ''), ('....-', '....'), ('alb-', 'alb'), ('c---', 'c--'), ('-', ''), ('-.', '.'), ('-----', '----'), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('------', '-----')] PHJ19030501-V18-05-page34.txt: [('D-', 'D'), ('-lb.', 'lb.'), ('-', ''), ('-', ''), ('-', '')] PHJ19030501-V18-05-page35.txt: [('SCIENTIFIC-', 'SCIENTIFIC'), ('-', ''), ('-', '')] PHJ19030601-V18-06-page11.txt: [('wake-', 'wake'), ('activ-', 'activ'), ('at-', 'at')] PHJ19030601-V18-06-page16.txt: [('disappoint-', 'disappoint')] PHJ19030601-V18-06-page18.txt: [('--Bice', '-Bice')] PHJ19030601-V18-06-page19.txt: [('tab-', 'tab'), ('lit-', 'lit')] PHJ19030601-V18-06-page2.txt: [('r--', 'r-'), ('-cent', 'cent')] PHJ19030601-V18-06-page22.txt: [('con-', 'con')] PHJ19030601-V18-06-page27.txt: [('Biscuit.--', 'Biscuit.-')] PHJ19030601-V18-06-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19030601-V18-06-page30.txt: [('s-', 's'), ('-', ''), ('-', ''), ("-I'", "I'"), ('R.-', 'R.'), ('V"-', 'V"'), ('-', ''), ("'-------", "'------"), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-be', 'be'), ('-', ''), ('.-', '.'), ('-f', 'f'), ("-'", "'"), ('-', ''), ('--', '-'), ('-', ''), ('-', '')] PHJ19030601-V18-06-page34.txt: [('-', ''), ('-', ''), ('-', ''), ('-lb.', 'lb.')] PHJ19030601-V18-06-page35.txt: [('SCIENTIFIC-', 'SCIENTIFIC')] PHJ19030601-V18-06-page6.txt: [('les-', 'les')] PHJ19030701-V18-07-page15.txt: [('-', ''), ('prescrip-', 'prescrip'), ('-', '')] PHJ19030701-V18-07-page21.txt: [('-please', 'please')] PHJ19030701-V18-07-page22.txt: [('be-', 'be')] PHJ19030701-V18-07-page28.txt: [('JOUR-', 'JOUR'), ('-', '')] PHJ19030701-V18-07-page3.txt: [('NUTTY-', 'NUTTY')] PHJ19030701-V18-07-page31.txt: [('-SIGNS', 'SIGNS'), ('-PAGE', 'PAGE')] PHJ19030701-V18-07-page34.txt: [('-lb.', 'lb.'), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19030701-V18-07-page35.txt: [('care-', 'care'), ("'tci-", "'tci"), ('-', '')] PHJ19030701-V18-07-page36.txt: [('prepara-', 'prepara'), ('-p', 'p'), ('c-', 'c')] PHJ19030801-V18-08-page12.txt: [('-', ''), ('-', ''), ('-', ''), ('arrange-', 'arrange')] PHJ19030801-V18-08-page13.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19030801-V18-08-page14.txt: [('pre-', 'pre')] PHJ19030801-V18-08-page18.txt: [('for-', 'for')] PHJ19030801-V18-08-page31.txt: [('-PAGE', 'PAGE')] PHJ19030801-V18-08-page34.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19030901-V18-09-page1.txt: [('Culture-', 'Culture'), ('Rahy-', 'Rahy')] PHJ19030901-V18-09-page12.txt: [('-', '')] PHJ19030901-V18-09-page16.txt: [('bear-', 'bear')] PHJ19030901-V18-09-page21.txt: [('-page', 'page')] PHJ19030901-V18-09-page25.txt: [('com-', 'com')] PHJ19030901-V18-09-page27.txt: [('--', '-'), ('-', '')] PHJ19030901-V18-09-page29.txt: [('ot-', 'ot'), ('---si-', '--si-'), ('-..', '..'), ('ti-', 'ti'), ('-', ''), ('-', ''), ('JIA-', 'JIA'), ('-', ''), ('-.', '.')] PHJ19030901-V18-09-page30.txt: [('Man-', 'Man')] PHJ19030901-V18-09-page31.txt: [('-', '')] PHJ19030901-V18-09-page32.txt: [('-....', '....'), ('--', '-'), ('-', ''), ('-', ''), ("--i'-", "-i'-"), ('-----', '----'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('--ri---m---', '-ri---m---'), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('--', '-'), ('-', ''), ('-', ''), ('--...', '-...'), ("..aill'-", "..aill'"), ('-', ''), ("'-", "'"), ('-----', '----'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-...Lii', '...Lii'), ('A-', 'A'), ('-a', 'a'), ('-.', '.')] PHJ19030901-V18-09-page34.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19030901-V18-09-page35.txt: [('care-', 'care'), ('rA-', 'rA'), ('-', '')] PHJ19030901-V18-09-page4.txt: [('-', ''), ('X---', 'X--'), ('-', ''), ('-', ''), ('------------', '-----------'), ("'---", "'--"), ('I-', 'I'), ('I-', 'I')] PHJ19031001-V18-10-page10.txt: [('-', ''), ('im-', 'im')] PHJ19031001-V18-10-page11.txt: [('pto-', 'pto')] PHJ19031001-V18-10-page16.txt: [('phi-', 'phi')] PHJ19031001-V18-10-page2.txt: [('-', ''), ('-', ''), ('-', '')] PHJ19031001-V18-10-page20.txt: [('de-', 'de')] PHJ19031001-V18-10-page23.txt: [('offen-', 'offen'), ('poi-', 'poi'), ('jour-', 'jour'), ('poi-', 'poi')] PHJ19031001-V18-10-page24.txt: [('-numerous', 'numerous')] PHJ19031001-V18-10-page25.txt: [('-', '')] PHJ19031001-V18-10-page29.txt: [('-', '')] PHJ19031001-V18-10-page31.txt: [('martyr-fashion--', 'martyr-fashion-')] PHJ19031001-V18-10-page37.txt: [('I-', 'I')] PHJ19031001-V18-10-page5.txt: [('-', ''), ('add-', 'add')] PHJ19031001-V18-10-page6.txt: [('-', '')] PHJ19031001-V18-10-page7.txt: [('-', ''), ('anti-', 'anti'), ('-', ''), ('de-', 'de'), ('-', '')] PHJ19031001-V18-10-page8.txt: [('fornica-', 'fornica')] PHJ19031001-V18-10-page9.txt: [('grind-', 'grind'), ('-', ''), ('Short-', 'Short'), ('vegetarian-', 'vegetarian')] PHJ19031101-V18-11-page11.txt: [('em-', 'em')] PHJ19031101-V18-11-page12.txt: [('hyper-', 'hyper')] PHJ19031101-V18-11-page15.txt: [('for--', 'for-')] PHJ19031101-V18-11-page29.txt: [('mother--', 'mother-')] PHJ19031101-V18-11-page3.txt: [('-', ''), ('-', ''), ('-', ''), ('-..', '..'), ('-', ''), ('-', ''), ('-..', '..'), ('-', ''), ('-', ''), ('-', ''), ('.-', '.'), ('-', ''), ('V-', 'V'), ('-', '')] PHJ19031101-V18-11-page30.txt: [('be.-', 'be.')] PHJ19031101-V18-11-page36.txt: [('S-', 'S'), ('-', '')] PHJ19031101-V18-11-page7.txt: [('objec-', 'objec')] PHJ19031201-V18-12-page17.txt: [('-', '')] PHJ19031201-V18-12-page20.txt: [('beck-', 'beck')] PHJ19031201-V18-12-page24.txt: [('bun-', 'bun')] PHJ19031201-V18-12-page25.txt: [('Wood-', 'Wood')] PHJ19031201-V18-12-page3.txt: [('-..', '..'), ('..---', '..--'), ('-', ''), ("'..-", "'.."), ('.-', '.'), ('-', ''), ('Represent-', 'Represent'), ('ag-', 'ag'), ('-t', 't'), ('.-', '.'), ('-dfill', 'dfill'), ('-f', 'f'), ('--', '-'), ('-', ''), ('---', '--'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19031201-V18-12-page32.txt: [('-e', 'e'), ('.--', '.-'), ('-', ''), ('------', '-----'), ('-', ''), ('....--', '....-'), ('..---."--', '..---."-'), ('-', ''), ('-', ''), ('.....-', '.....'), ('.---', '.--'), ('---', '--'), ('r-', 'r'), ('-', ''), ('-', ''), ('---r-', '--r-'), ('-', ''), ('-', ''), ('----ii', '---ii'), ('-', ''), ('-.', '.'), ('-', ''), ('-', ''), ('--', '-'), ('-ilili', 'ilili'), ('-', ''), ('f-', 'f'), ('.-', '.'), ('--', '-')] PHJ19031201-V18-12-page34.txt: [('-', ''), ('-', '')] PHJ19031201-V18-12-page6.txt: [('dis-', 'dis')] PHJ19040101-V19-01-page28.txt: [('be-', 'be')] PHJ19040101-V19-01-page29.txt: [('----it', '---it')] PHJ19040101-V19-01-page32.txt: [('Cook-', 'Cook')] PHJ19040101-V19-01-page34.txt: [('-', ''), ('-', '')] PHJ19040101-V19-01-page4.txt: [('-ADDER', 'ADDER')] PHJ19040101-V19-01-page7.txt: [('water-ab-', 'water-ab')] PHJ19040101-V19-01-page8.txt: [('-ick', 'ick')] PHJ19040201-V19-02-page11.txt: [('-There', 'There')] PHJ19040201-V19-02-page2.txt: [('-tou', 'tou'), ('"-', '"')] PHJ19040201-V19-02-page23.txt: [('ex-', 'ex')] PHJ19040201-V19-02-page29.txt: [('.---', '.--')] PHJ19040201-V19-02-page30.txt: [('-', '')] PHJ19040201-V19-02-page31.txt: [('-I', 'I'), ('-', ''), ('--', '-'), ('--i', '-i'), ('-f-', 'f-'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('..-', '..'), ('oti-', 'oti'), ('-', ''), ('-', ''), ('-', '')] PHJ19040201-V19-02-page32.txt: [('Cook-', 'Cook')] PHJ19040201-V19-02-page34.txt: [('-ctriu', 'ctriu'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19040201-V19-02-page35.txt: [('-', ''), ('N-', 'N'), ('t--', 't-'), ('k----', 'k---')] PHJ19040301-V19-03-page2.txt: [('--', '-'), ('-atin', 'atin'), ('-', ''), ('-', '')] PHJ19040301-V19-03-page27.txt: [('-', ''), ('-', ''), ('N-', 'N')] PHJ19040301-V19-03-page29.txt: [('-', ''), ('-', ''), ('-..."', '..."'), ('..-', '..'), ('-', ''), ('--ma', '-ma'), ('-', ''), ('-', ''), ('-...', '...'), ('----', '---'), ('ii-', 'ii'), ('--', '-'), ('-', ''), ('-', ''), ('-', ''), ('-.', '.'), ("-'", "'"), ('it-', 'it'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ("'--", "'-")] PHJ19040301-V19-03-page31.txt: [('---', '--'), ('..-', '..'), ('-', '')] PHJ19040301-V19-03-page33.txt: [('r-', 'r')] PHJ19040301-V19-03-page34.txt: [('-clouwtyiu', 'clouwtyiu'), ('-', ''), ('-t-', 't-'), ('-', ''), ('-', ''), ('-', '')] PHJ19040401-V19-04-page15.txt: [('irregu-', 'irregu')] PHJ19040401-V19-04-page17.txt: [('ob-', 'ob')] PHJ19040401-V19-04-page19.txt: [('or-', 'or')] PHJ19040401-V19-04-page29.txt: [('Text-', 'Text')] PHJ19040401-V19-04-page30.txt: [('--', '-')] PHJ19040401-V19-04-page34.txt: [('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19040401-V19-04-page35.txt: [('-', '')] PHJ19040401-V19-04-page36.txt: [('-.', '.'), ('-', ''), ("-'", "'"), ('-', ''), ('....-', '....'), ('-', ''), ('-A-', 'A-')] PHJ19040401-V19-04-page6.txt: [('-', ''), ('.-', '.')] PHJ19040501-V19-05-page11.txt: [('JOUR-', 'JOUR')] PHJ19040501-V19-05-page12.txt: [('pub-', 'pub')] PHJ19040501-V19-05-page13.txt: [('def-', 'def')] PHJ19040501-V19-05-page16.txt: [('hope-', 'hope')] PHJ19040501-V19-05-page2.txt: [("-'L", "'L")] PHJ19040501-V19-05-page29.txt: [('Pub-', 'Pub')] PHJ19040501-V19-05-page33.txt: [('-', '')] PHJ19040501-V19-05-page34.txt: [('-clfou', 'clfou'), ('-iti', 'iti'), ('-', ''), ('-', ''), ('-', ''), ('-', ''), ('-', '')] PHJ19040501-V19-05-page5.txt: [('be-', 'be')] PHJ19040501-V19-05-page8.txt: [('Him-', 'Him')] PHJ19040501-V19-05-page9.txt: [('des-', 'des')] PHJ19040601-V19-06-page1.txt: [('--', '-')] PHJ19040601-V19-06-page10.txt: [('elimina-', 'elimina')] PHJ19040601-V19-06-page18.txt: [('doubt-', 'doubt')] PHJ19040601-V19-06-page19.txt: [('JouR-', 'JouR')] PHJ19040601-V19-06-page23.txt: [('--', '-')] PHJ19040601-V19-06-page3.txt: [('phy-', 'phy')] PHJ19040601-V19-06-page5.txt: [('-', '')] PHJ19040601-V19-06-page7.txt: [('in-', 'in')] PHJ19040601-V19-06-page9.txt: [('Pawlow-', 'Pawlow')]
In [20]:
# %load shared_elements/summary.py
summary = GoH.reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/PHJ/correction3 Average verified rate: 0.9803429572874915 Average of error rates: 0.027733170406576316 Total token count: 2850734
In [21]:
# %load shared_elements/top_errors.py
errors_summary = GoH.reports.get_errors_summary( summary )
GoH.reports.top_errors( errors_summary, 10 )[:50]
Out[21]:
[('m', 2734), ('d', 2327), ("'", 2191), ('e', 1391), ('r', 1266), ('t', 1202), ('w', 1175), ('n', 1117), ('co', 1105), ('f', 794), ('g', 766), ('x', 647), ('lb', 588), ('th', 260), ('sel', 251), ('oo', 236), ('mo', 233), ('pp', 228), ('k', 216), ('z', 214), ('u', 199), ("an'", 192), ('q', 133), ('ex', 111), ('al', 107), ('ournal', 105), ('pa', 88), ("'tis", 87), ('te', 86), ('ro', 85), ('oz', 81), ('ga', 81), ("'the", 80), ('va', 73), ('munn', 73), ('ti', 73), ('io', 72), ("''", 68), ('-', 65), ('tion', 63), ("infants'", 61), ('id', 59), ('re', 58), ('zo', 55), ('viperance', 54), ('yo', 48), ('si', 48), ('urnal', 48), ("'em", 48), ('fahr', 46)]
Correction 4 -- Remove extra quotation marks¶
In [22]:
# %load shared_elements/replace_extra_quotation_marks.py
prev = cycle
cycle = "correction4"
directories = GoH.utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = GoH.utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = GoH.utilities.tokenize_text(text)
corrections = []
for token in tokens:
token_list = list(token)
last_char = token_list[-1]
if last_char is "'":
if len(token) > 1:
if token_list[-2] is 's' or 'S':
pass
else:
corrections.append((token, re.sub(r"'", r"", token)))
else:
pass
elif token[0] is "'":
corrections.append((token, re.sub(r"'", r"", token)))
else:
pass
if len(corrections) > 0:
print('{}: {}'.format(filename, corrections))
for correction in corrections:
content = GoH.clean.replace_pair(correction, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
PHJ18850601-V01-01-page12.txt: [("'Blessed", 'Blessed')] PHJ18850601-V01-01-page16.txt: [("'graham", 'graham')] PHJ18850601-V01-01-page17.txt: [("'should", 'should')] PHJ18850601-V01-01-page24.txt: [("'natural", 'natural'), ("'A", 'A'), ("'VP", 'VP')] PHJ18850601-V01-01-page7.txt: [("'Tis", 'Tis'), ("'Twill", 'Twill'), ("'Tis", 'Tis')] PHJ18850601-V01-01-page8.txt: [("'Now", 'Now')] PHJ18850801-V01-02-page16.txt: [("'But", 'But')] PHJ18850801-V01-02-page5.txt: [("'of", 'of')] PHJ18850801-V01-02-page7.txt: [("'not", 'not')] PHJ18851001-V01-03-page11.txt: [("'fortunately", 'fortunately')] PHJ18851001-V01-03-page12.txt: [("'Taper", 'Taper'), ("'taper", 'taper'), ("'a", 'a')] PHJ18851001-V01-03-page13.txt: [("'Sometimes", 'Sometimes'), ("'A", 'A')] PHJ18851001-V01-03-page18.txt: [("'What", 'What')] PHJ18851001-V01-03-page19.txt: [("'healthy", 'healthy')] PHJ18851001-V01-03-page20.txt: [("'heroine", 'heroine')] PHJ18851001-V01-03-page21.txt: [("'sure", 'sure')] PHJ18851001-V01-03-page22.txt: [("'Our", 'Our')] PHJ18851001-V01-03-page24.txt: [("'ii", 'ii')] PHJ18851201-V01-04-page10.txt: [("'It", 'It'), ("'The", 'The'), ("'He", 'He')] PHJ18851201-V01-04-page14.txt: [("'the", 'the'), ("'The", 'The'), ("'Spect", 'Spect')] PHJ18851201-V01-04-page15.txt: [("'to", 'to'), ("'Indeed", 'Indeed'), ("'but", 'but'), ("'came", 'came')] PHJ18851201-V01-04-page18.txt: [("'e", 'e')] PHJ18851201-V01-04-page22.txt: [("'s", 's')] PHJ18851201-V01-04-page4.txt: [("'doubt", 'doubt'), ("'and", 'and')] PHJ18860201-V01-05-page12.txt: [("'ruddy", 'ruddy')] PHJ18860201-V01-05-page19.txt: [("'Can't", 'Cant'), ("'Why", 'Why')] PHJ18860201-V01-05-page23.txt: [("'I", 'I')] PHJ18860201-V01-05-page3.txt: [("'of", 'of')] PHJ18860401-V01-06-page1.txt: [("'spring.", 'spring.')] PHJ18860401-V01-06-page12.txt: [("'Indifference", 'Indifference'), ("'is", 'is')] PHJ18860401-V01-06-page13.txt: [("'Shall", 'Shall'), ("'Yes", 'Yes'), ("'No", 'No')] PHJ18860401-V01-06-page24.txt: [("'For", 'For')] PHJ18860401-V01-06-page4.txt: [("'first", 'first')] PHJ18860401-V01-06-page7.txt: [("'morning", 'morning')] PHJ18860401-V01-06-page9.txt: [("'will", 'will')] PHJ18860601-V02-01-page10.txt: [("'pears", 'pears'), ("'tis", 'tis'), ("'fore", 'fore'), ("'em.", 'em.'), ("'em", 'em'), ("'em", 'em')] PHJ18860601-V02-01-page11.txt: [("'em", 'em'), ("'em.", 'em.')] PHJ18860601-V02-01-page12.txt: [("'and", 'and'), ("'users", 'users')] PHJ18860601-V02-01-page13.txt: [("'respiratory", 'respiratory'), ("'regulator", 'regulator')] PHJ18860601-V02-01-page15.txt: [("'er", 'er'), ("'stonishing", 'stonishing'), ("'er", 'er')] PHJ18860601-V02-01-page16.txt: [("'The", 'The')] PHJ18860601-V02-01-page23.txt: [("'How", 'How')] PHJ18860601-V02-01-page5.txt: [("'midst", 'midst')] PHJ18860601-V02-01-page9.txt: [("'The", 'The')] PHJ18860801-V02-02-page1.txt: [("''Ir", 'Ir')] PHJ18860801-V02-02-page17.txt: [("'spect", 'spect')] PHJ18860801-V02-02-page18.txt: [("'oo", 'oo')] PHJ18860801-V02-02-page22.txt: [("'Tish", 'Tish')] PHJ18860801-V02-02-page26.txt: [("'ffia", 'ffia')] PHJ18860801-V02-02-page28.txt: [("'Ibrown", 'Ibrown')] PHJ18860801-V02-02-page6.txt: [("'They", 'They'), ("'But", 'But')] PHJ18861001-V02-03-page1.txt: [("'.", '.'), ("'..", '..')] PHJ18861001-V02-03-page13.txt: [("'The", 'The')] PHJ18861001-V02-03-page14.txt: [("'There", 'There'), ("'em.", 'em.'), ("'em", 'em'), ("'em", 'em'), ("'em", 'em')] PHJ18861001-V02-03-page16.txt: [("'s", 's')] PHJ18861001-V02-03-page17.txt: [("'tobacco", 'tobacco')] PHJ18861001-V02-03-page26.txt: [("'Rip", 'Rip'), ("'created", 'created'), ("'very", 'very')] PHJ18861001-V02-03-page27.txt: [("'Tress", 'Tress')] PHJ18861001-V02-03-page28.txt: [('\'m"aenrnel', 'm"aenrnel'), ("'TEMPERANCE", 'TEMPERANCE'), ("'TEMPERANCE", 'TEMPERANCE')] PHJ18861001-V02-03-page29.txt: [("'What", 'What')] PHJ18861001-V02-03-page5.txt: [("'A", 'A')] PHJ18861201-V02-04-page1.txt: [("'NNN.", 'NNN.'), ("'...", '...'), ("'i", 'i')] PHJ18861201-V02-04-page13.txt: [("'corsets", 'corsets')] PHJ18861201-V02-04-page14.txt: [("'twas", 'twas'), ("'tis", 'tis')] PHJ18861201-V02-04-page22.txt: [("'very", 'very')] PHJ18861201-V02-04-page26.txt: [("'x.", 'x.')] PHJ18861201-V02-04-page29.txt: [("'In", 'In'), ("'In", 'In')] PHJ18861201-V02-04-page30.txt: [("'rum", 'rum')] PHJ18861201-V02-04-page7.txt: [("'or", 'or')] PHJ18870201-V02-05-page1.txt: [("'.s.", '.s.'), ("''XXN.V..", 'XXN.V..')] PHJ18870201-V02-05-page10.txt: [("'of", 'of')] PHJ18870201-V02-05-page18.txt: [("'great", 'great')] PHJ18870201-V02-05-page2.txt: [("'to", 'to')] PHJ18870201-V02-05-page25.txt: [("'clothing", 'clothing'), ("'the", 'the')] PHJ18870201-V02-05-page29.txt: [('\'e"', 'e"')] PHJ18870201-V02-05-page9.txt: [("'that", 'that'), ("'Taint", 'Taint')] PHJ18870401-V02-06-page16.txt: [("'em.", 'em.'), ("'em.", 'em.'), ("'em", 'em'), ("'em", 'em'), ("'em", 'em'), ("'em", 'em'), ("'did", 'did')] PHJ18870401-V02-06-page18.txt: [("'etc.", 'etc.')] PHJ18870401-V02-06-page22.txt: [("'Tis", 'Tis'), ("'Tis", 'Tis'), ("'tis", 'tis'), ("'ots", 'ots')] PHJ18870401-V02-06-page26.txt: [("'Society", 'Society')] PHJ18870401-V02-06-page27.txt: [("'Territory", 'Territory')] PHJ18870401-V02-06-page28.txt: [("'The", 'The')] PHJ18870401-V02-06-page29.txt: [("'The", 'The')] PHJ18870401-V02-06-page31.txt: [("'arks", 'arks')] PHJ18870401-V02-06-page4.txt: [("'In", 'In')] PHJ18870401-V02-06-page5.txt: [("'T", 'T')] PHJ18870401-V02-06-page8.txt: [("'God", 'God'), ("'drinking", 'drinking')] PHJ18870401-V02-06-page9.txt: [("'If", 'If'), ("'unseasonable", 'unseasonable')] PHJ18870601-V02-07-page1.txt: [("'N", 'N')] PHJ18870601-V02-07-page16.txt: [("'taint", 'taint'), ("'cause", 'cause'), ("'Vice", 'Vice')] PHJ18870601-V02-07-page17.txt: [("'Twas", 'Twas')] PHJ18870601-V02-07-page20.txt: [("'hard-pan", 'hard-pan')] PHJ18870601-V02-07-page24.txt: [("'II", 'II'), ("'aint", 'aint')] PHJ18870601-V02-07-page25.txt: [("'These", 'These'), ("'intended", 'intended')] PHJ18870601-V02-07-page26.txt: [("'gainst", 'gainst')] PHJ18870601-V02-07-page27.txt: [("'Walla", 'Walla')] PHJ18870601-V02-07-page29.txt: [("'These", 'These'), ("'Breakfast", 'Breakfast')] PHJ18870601-V02-07-page3.txt: [("'healthy", 'healthy')] PHJ18870801-V02-08-page1.txt: [("'.", '.'), ("'.", '.'), ("'.", '.'), ("'.", '.')] PHJ18870801-V02-08-page12.txt: [("'things", 'things')] PHJ18870801-V02-08-page18.txt: [("'Twas", 'Twas'), ("'em", 'em')] PHJ18870801-V02-08-page2.txt: [("'New", 'New')] PHJ18870801-V02-08-page21.txt: [("'interesting", 'interesting')] PHJ18870801-V02-08-page23.txt: [('\'"MY', '"MY')] PHJ18870801-V02-08-page24.txt: [("'bout", 'bout')] PHJ18870801-V02-08-page26.txt: [("'poor", 'poor')] PHJ18870801-V02-08-page29.txt: [("''....", '....')] PHJ18870801-V02-08-page32.txt: [("'For", 'For'), ("'oat", 'oat')] PHJ18870801-V02-08-page6.txt: [("'day", 'day')] PHJ18870801-V02-08-page8.txt: [("'Crystal", 'Crystal')] PHJ18870801-V02-08-page9.txt: [("'work", 'work')] PHJ18871001-V02-09-page1.txt: [("'.", '.'), ("'s.", 's.')] PHJ18871001-V02-09-page12.txt: [("'round", 'round'), ("'nough", 'nough'), ("'pears", 'pears')] PHJ18871001-V02-09-page13.txt: [("'spose", 'spose')] PHJ18871001-V02-09-page15.txt: [("'or", 'or')] PHJ18871001-V02-09-page23.txt: [("'of", 'of'), ("'directly", 'directly')] PHJ18871001-V02-09-page25.txt: [("'No.", 'No.')] PHJ18871001-V02-09-page32.txt: [("'Muscular", 'Muscular')] PHJ18871001-V02-09-page34.txt: [("'Two", 'Two')] PHJ18871001-V02-09-page5.txt: [("'And", 'And')] PHJ18871001-V02-09-page8.txt: [("'to", 'to')] PHJ18880101-V03-01-page12.txt: [("'Mid", 'Mid')] PHJ18880101-V03-01-page13.txt: [("'ong.", 'ong.'), ("'following", 'following')] PHJ18880101-V03-01-page14.txt: [("'evil", 'evil')] PHJ18880101-V03-01-page16.txt: [("'that", 'that'), ("'Faith", 'Faith')] PHJ18880101-V03-01-page22.txt: [("'gin", 'gin'), ("'midst", 'midst')] PHJ18880101-V03-01-page26.txt: [("'N", 'N')] PHJ18880101-V03-01-page28.txt: [("'I", 'I')] PHJ18880101-V03-01-page4.txt: [("'Have", 'Have'), ('\'flesh.\'"', 'flesh."')] PHJ18880101-V03-01-page6.txt: [("'dines", 'dines')] PHJ18880201-V03-02-page11.txt: [("'tisn't", 'tisnt'), ("'tain't", 'taint')] PHJ18880201-V03-02-page14.txt: [("'It", 'It'), ("'iquors", 'iquors')] PHJ18880201-V03-02-page17.txt: [("'em", 'em'), ("'em.", 'em.')] PHJ18880201-V03-02-page2.txt: [("'give", 'give'), ("'As", 'As')] PHJ18880201-V03-02-page22.txt: [("'Twere", 'Twere')] PHJ18880201-V03-02-page5.txt: [("'Bile", 'Bile')] PHJ18880301-V03-03-page12.txt: [("'also", 'also')] PHJ18880301-V03-03-page18.txt: [("'I", 'I')] PHJ18880301-V03-03-page25.txt: [("'jnI", 'jnI')] PHJ18880301-V03-03-page27.txt: [("'L", 'L')] PHJ18880301-V03-03-page30.txt: [("''.", '.'), ("'i", 'i'), ("'.", '.'), ("'.", '.')] PHJ18880301-V03-03-page7.txt: [("'.", '.')] PHJ18880401-V03-04-page11.txt: [("'But", 'But'), ("'Because", 'Because'), ("'Fannie", 'Fannie'), ("'so", 'so'), ("'Twas", 'Twas')] PHJ18880401-V03-04-page12.txt: [("'Thy", 'Thy')] PHJ18880401-V03-04-page15.txt: [("'a", 'a')] PHJ18880401-V03-04-page16.txt: [("'em", 'em')] PHJ18880401-V03-04-page17.txt: [("'.", '.')] PHJ18880401-V03-04-page2.txt: [("'be", 'be')] PHJ18880401-V03-04-page22.txt: [("'Alice", 'Alice')] PHJ18880401-V03-04-page23.txt: [("'cept", 'cept')] PHJ18880401-V03-04-page27.txt: [("'.", '.'), ("'T", 'T')] PHJ18880401-V03-04-page28.txt: [("'Co", 'Co')] PHJ18880401-V03-04-page32.txt: [("'who", 'who'), ("'who", 'who')] PHJ18880401-V03-04-page5.txt: [("'behind", 'behind')] PHJ18880401-V03-04-page6.txt: [("'em.", 'em.'), ("'em.", 'em.')] PHJ18880401-V03-04-page8.txt: [("'the", 'the')] PHJ18880401-V03-04-page9.txt: [("'I", 'I'), ("'perfectly", 'perfectly')] PHJ18880501-V03-05-page13.txt: [("'Why", 'Why'), ("'the", 'the'), ("'Water", 'Water')] PHJ18880501-V03-05-page14.txt: [("'tis", 'tis')] PHJ18880501-V03-05-page18.txt: [("'Twas", 'Twas')] PHJ18880501-V03-05-page21.txt: [("'Those", 'Those')] PHJ18880501-V03-05-page25.txt: [("'ate", 'ate')] PHJ18880501-V03-05-page26.txt: [("'qllltlioutr", 'qllltlioutr')] PHJ18880501-V03-05-page27.txt: [("'z", 'z'), ('\'""', '""')] PHJ18880501-V03-05-page29.txt: [("'Co", 'Co')] PHJ18880501-V03-05-page7.txt: [("'an", 'an')] PHJ18880501-V03-05-page8.txt: [("'so", 'so'), ("'And", 'And')] PHJ18880501-V03-05-page9.txt: [("'That", 'That')] PHJ18880601-V03-06-page13.txt: [("'Ripeness", 'Ripeness')] PHJ18880601-V03-06-page16.txt: [("'to", 'to')] PHJ18880601-V03-06-page17.txt: [("'gainst", 'gainst'), ("'bout", 'bout')] PHJ18880601-V03-06-page19.txt: [("'and", 'and')] PHJ18880601-V03-06-page20.txt: [("'make", 'make')] PHJ18880601-V03-06-page25.txt: [("'co", 'co')] PHJ18880601-V03-06-page27.txt: [("'MONEY", 'MONEY'), ("'SM", 'SM')] PHJ18880601-V03-06-page28.txt: [("'the", 'the')] PHJ18880601-V03-06-page30.txt: [("'d", 'd')] PHJ18880601-V03-06-page8.txt: [("'to", 'to')] PHJ18880701-V03-07-page10.txt: [("'the", 'the')] PHJ18880701-V03-07-page13.txt: [("'T.", 'T.')] PHJ18880701-V03-07-page15.txt: [("'Additional", 'Additional')] PHJ18880701-V03-07-page21.txt: [("'bleeged", 'bleeged'), ("'bleeged", 'bleeged')] PHJ18880701-V03-07-page27.txt: [("'fc", 'fc')] PHJ18880701-V03-07-page30.txt: [("'Each", 'Each')] PHJ18880701-V03-07-page4.txt: [("'that", 'that')] PHJ18880801-V03-08-page12.txt: [("'J", 'J')] PHJ18880801-V03-08-page14.txt: [("'tis", 'tis')] PHJ18880801-V03-08-page18.txt: [("'notwithstanding", 'notwithstanding')] PHJ18880801-V03-08-page19.txt: [("'twas", 'twas'), ("'twas", 'twas'), ("'twasn't", 'twasnt')] PHJ18880801-V03-08-page26.txt: [("'f", 'f')] PHJ18880801-V03-08-page27.txt: [("'I", 'I'), ("'P", 'P')] PHJ18880801-V03-08-page29.txt: [("'Co", 'Co')] PHJ18880801-V03-08-page31.txt: [("'gaper", 'gaper')] PHJ18880801-V03-08-page32.txt: [("'uncap", 'uncap'), ("'cry", 'cry')] PHJ18880801-V03-08-page8.txt: [("'down", 'down')] PHJ18880901-V03-09-page12.txt: [("'a", 'a')] PHJ18880901-V03-09-page14.txt: [("'twas", 'twas')] PHJ18880901-V03-09-page15.txt: [("'demands", 'demands')] PHJ18880901-V03-09-page16.txt: [("'the", 'the')] PHJ18880901-V03-09-page17.txt: [("'public", 'public'), ("'decomposition", 'decomposition')] PHJ18880901-V03-09-page20.txt: [("'We", 'We')] PHJ18880901-V03-09-page22.txt: [("'neath", 'neath')] PHJ18880901-V03-09-page29.txt: [("'Liberal", 'Liberal')] PHJ18880901-V03-09-page31.txt: [("'Slill", 'Slill')] PHJ18880901-V03-09-page32.txt: [("'Workmanship", 'Workmanship'), ("'Y", 'Y')] PHJ18881001-V03-10-page12.txt: [("'on", 'on')] PHJ18881001-V03-10-page13.txt: [("'What", 'What'), ("'Ah", 'Ah'), ("'they", 'they'), ("'Not", 'Not'), ("'Mean", 'Mean'), ("'that", 'that'), ("'Mr.", 'Mr.'), ("'ere", 'ere'), ("'and", 'and'), ("'em", 'em'), ("'em", 'em')] PHJ18881001-V03-10-page14.txt: [("'my", 'my'), ("'Tis", 'Tis'), ("'Twill", 'Twill')] PHJ18881001-V03-10-page15.txt: [("'all", 'all')] PHJ18881001-V03-10-page18.txt: [("'Though", 'Though'), ("'neath", 'neath'), ("'tis", 'tis')] PHJ18881001-V03-10-page20.txt: [("'After", 'After'), ("'Suddenly", 'Suddenly')] PHJ18881001-V03-10-page21.txt: [("'rest", 'rest')] PHJ18881001-V03-10-page23.txt: [("'em", 'em')] PHJ18881001-V03-10-page24.txt: [("'and", 'and')] PHJ18881001-V03-10-page26.txt: [("'em", 'em'), ("'em", 'em')] PHJ18881001-V03-10-page29.txt: [("'Co", 'Co')] PHJ18881001-V03-10-page31.txt: [("'Sides", 'Sides')] PHJ18881101-V03-11-page1.txt: [("'Duty", 'Duty'), ("'bad", 'bad'), ("'misfortune", 'misfortune')] PHJ18881101-V03-11-page14.txt: [("'Tis", 'Tis')] PHJ18881101-V03-11-page18.txt: [("'Tis", 'Tis'), ("'must", 'must')] PHJ18881101-V03-11-page19.txt: [("'soul", 'soul')] PHJ18881101-V03-11-page20.txt: [("'Whether", 'Whether')] PHJ18881101-V03-11-page24.txt: [("'ow", 'ow'), ("'ooman", 'ooman')] PHJ18881101-V03-11-page25.txt: [("'bout", 'bout'), ("'Gusty", 'Gusty'), ("'vised", 'vised'), ("'spinted", 'spinted'), ("'splain", 'splain')] PHJ18881101-V03-11-page27.txt: [("'iii", 'iii')] PHJ18881101-V03-11-page29.txt: [("'S", 'S')] PHJ18881101-V03-11-page3.txt: [("'burn", 'burn')] PHJ18881101-V03-11-page31.txt: [("'ts", 'ts'), ("'Co", 'Co')] PHJ18881101-V03-11-page7.txt: [("'on", 'on')] PHJ18881201-V03-12-page11.txt: [("'I", 'I'), ("'The", 'The')] PHJ18881201-V03-12-page13.txt: [("'the", 'the')] PHJ18881201-V03-12-page17.txt: [("'cos", 'cos')] PHJ18881201-V03-12-page22.txt: [("'mid", 'mid')] PHJ18881201-V03-12-page27.txt: [("'ciety", 'ciety')] PHJ18881201-V03-12-page28.txt: [("'zr", 'zr')] PHJ18881201-V03-12-page3.txt: [("'Hannah's", 'Hannahs')] PHJ18881201-V03-12-page6.txt: [("'While", 'While'), ("'The", 'The'), ("'physiological", 'physiological'), ("'The", 'The')] PHJ18881201-V03-12-page8.txt: [("'when", 'when')] PHJ18890101-V04-01-page1.txt: [("'Worry", 'Worry')] PHJ18890101-V04-01-page12.txt: [("'their", 'their')] PHJ18890101-V04-01-page13.txt: [("'em", 'em')] PHJ18890101-V04-01-page14.txt: [("'bacco", 'bacco'), ("'bacco", 'bacco')] PHJ18890101-V04-01-page16.txt: [("'a", 'a')] PHJ18890101-V04-01-page17.txt: [("'We", 'We')] PHJ18890101-V04-01-page2.txt: [("'never", 'never')] PHJ18890101-V04-01-page22.txt: [("'Tis", 'Tis'), ("'Tis", 'Tis'), ("'Tis", 'Tis'), ("'Tis", 'Tis')] PHJ18890101-V04-01-page24.txt: [("'.I", '.I')] PHJ18890101-V04-01-page26.txt: [("'looked", 'looked')] PHJ18890101-V04-01-page29.txt: [("'No.", 'No.')] PHJ18890101-V04-01-page31.txt: [("'I", 'I')] PHJ18890201-V04-02-page11.txt: [("'ontaminate", 'ontaminate'), ("'From", 'From')] PHJ18890201-V04-02-page14.txt: [("'spirituous", 'spirituous')] PHJ18890201-V04-02-page16.txt: [("'THE", 'THE')] PHJ18890201-V04-02-page17.txt: [("'the", 'the')] PHJ18890201-V04-02-page18.txt: [("'tis", 'tis')] PHJ18890201-V04-02-page21.txt: [("'that", 'that')] PHJ18890201-V04-02-page24.txt: [("'I", 'I')] PHJ18890201-V04-02-page28.txt: [("'mole..", 'mole..')] PHJ18890201-V04-02-page31.txt: [("'SW", 'SW')] PHJ18890201-V04-02-page32.txt: [("'a", 'a'), ("'Abiding", 'Abiding')] PHJ18890201-V04-02-page4.txt: [("'to", 'to'), ("'.", '.')] PHJ18890201-V04-02-page8.txt: [("'and", 'and'), ("'article.", 'article.')] PHJ18890201-V04-02-page9.txt: [("'artist", 'artist')] PHJ18890301-V04-03-page12.txt: [("'a", 'a')] PHJ18890301-V04-03-page13.txt: [("'Who", 'Who')] PHJ18890301-V04-03-page14.txt: [("'very", 'very'), ("'the", 'the')] PHJ18890301-V04-03-page19.txt: [("'Some", 'Some'), ("'My", 'My')] PHJ18890301-V04-03-page2.txt: [("'nooralgy", 'nooralgy')] PHJ18890301-V04-03-page20.txt: [("'twas", 'twas'), ("'twould", 'twould')] PHJ18890301-V04-03-page24.txt: [("'twas", 'twas'), ("'twas", 'twas')] PHJ18890301-V04-03-page28.txt: [("'C", 'C')] PHJ18890301-V04-03-page3.txt: [("'from", 'from')] PHJ18890301-V04-03-page32.txt: [("'I", 'I')] PHJ18890301-V04-03-page8.txt: [("'that", 'that')] PHJ18890401-V04-04-page17.txt: [("'organs", 'organs')] PHJ18890401-V04-04-page18.txt: [("'through", 'through')] PHJ18890401-V04-04-page24.txt: [("'Twas", 'Twas'), ("'then", 'then')] PHJ18890401-V04-04-page25.txt: [("'The", 'The')] PHJ18890401-V04-04-page27.txt: [("'ro", 'ro'), ("'he", 'he')] PHJ18890401-V04-04-page28.txt: [("'C", 'C'), ("'c'zic.", 'czic.'), ("'WVA.", 'WVA.'), ("'.", '.'), ("'a", 'a'), ("'c", 'c'), ("''Va.d", 'Va.d')] PHJ18890401-V04-04-page3.txt: [("'My", 'My'), ("'go", 'go')] PHJ18890401-V04-04-page31.txt: [("'shortened.", 'shortened.')] PHJ18890401-V04-04-page32.txt: [("'Thermo", 'Thermo')] PHJ18890401-V04-04-page5.txt: [("'Send", 'Send'), ("'enough", 'enough')] PHJ18890401-V04-04-page6.txt: [("'plaster", 'plaster'), ("'something", 'something'), ("'something", 'something'), ("'Something", 'Something'), ("'big-jaw", 'big-jaw')] PHJ18890401-V04-04-page7.txt: [("'big-jaw", 'big-jaw')] PHJ18890401-V04-04-page8.txt: [("'of", 'of')] PHJ18890501-V04-05-page1.txt: [("'Eat", 'Eat')] PHJ18890501-V04-05-page10.txt: [("'gargling", 'gargling')] PHJ18890501-V04-05-page15.txt: [("'Yes", 'Yes'), ("'the", 'the')] PHJ18890501-V04-05-page21.txt: [("'airy", 'airy')] PHJ18890501-V04-05-page24.txt: [("'Tis", 'Tis'), ("'ail", 'ail')] PHJ18890501-V04-05-page25.txt: [("'Misses", 'Misses'), ("'y", 'y')] PHJ18890501-V04-05-page3.txt: [("'as", 'as')] PHJ18890501-V04-05-page31.txt: [("'Co", 'Co'), ("'dress", 'dress')] PHJ18890501-V04-05-page8.txt: [("'taint", 'taint'), ("'twas", 'twas'), ("'tis", 'tis'), ("'till", 'till')] PHJ18890601-V04-06-page12.txt: [("'Tis", 'Tis')] PHJ18890601-V04-06-page16.txt: [("'Tis", 'Tis')] PHJ18890601-V04-06-page19.txt: [("'man", 'man')] PHJ18890601-V04-06-page24.txt: [("'r", 'r')] PHJ18890601-V04-06-page27.txt: [("'and", 'and')] PHJ18890601-V04-06-page29.txt: [('\'"', '"'), ("'.", '.')] PHJ18890601-V04-06-page32.txt: [("'TOY", 'TOY')] PHJ18890601-V04-06-page5.txt: [("'There", 'There')] PHJ18890701-V04-07-page10.txt: [("'or", 'or')] PHJ18890701-V04-07-page12.txt: [("'twas", 'twas'), ("'tis", 'tis')] PHJ18890701-V04-07-page17.txt: [("'mature", 'mature')] PHJ18890701-V04-07-page18.txt: [("'em", 'em')] PHJ18890701-V04-07-page22.txt: [("'every", 'every')] PHJ18890701-V04-07-page27.txt: [("'A", 'A')] PHJ18890701-V04-07-page28.txt: [("'II", 'II'), ("'MEDICAL", 'MEDICAL')] PHJ18890701-V04-07-page7.txt: [("'excursus", 'excursus')] PHJ18890801-V04-08-page13.txt: [("'ballots", 'ballots'), ("'e", 'e'), ("'em", 'em')] PHJ18890801-V04-08-page14.txt: [("'em", 'em'), ("'E", 'E')] PHJ18890801-V04-08-page16.txt: [("'Twill", 'Twill')] PHJ18890801-V04-08-page2.txt: [("'conditions", 'conditions')] PHJ18890801-V04-08-page21.txt: [("'most", 'most'), ("'most", 'most'), ("'most", 'most'), ("'most", 'most')] PHJ18890801-V04-08-page24.txt: [("'Tis", 'Tis')] PHJ18890801-V04-08-page28.txt: [("'tars", 'tars')] PHJ18890901-V04-09-page12.txt: [('\'"', '"')] PHJ18890901-V04-09-page16.txt: [("'tis", 'tis')] PHJ18890901-V04-09-page20.txt: [("'Tis", 'Tis')] PHJ18890901-V04-09-page24.txt: [("'Tts", 'Tts')] PHJ18890901-V04-09-page25.txt: [("'say", 'say')] PHJ18890901-V04-09-page27.txt: [("'those", 'those')] PHJ18890901-V04-09-page31.txt: [("'Co", 'Co')] PHJ18891001-V04-10-page16.txt: [("'most", 'most')] PHJ18891001-V04-10-page2.txt: [("'ere", 'ere')] PHJ18891001-V04-10-page21.txt: [("'Twill", 'Twill'), ("'the", 'the')] PHJ18891001-V04-10-page22.txt: [("'You", 'You')] PHJ18891001-V04-10-page24.txt: [("'twas", 'twas')] PHJ18891001-V04-10-page29.txt: [("'WS", 'WS'), ("'rs.", 'rs.')] PHJ18891001-V04-10-page32.txt: [("'I", 'I'), ("'dress", 'dress')] PHJ18891001-V04-10-page8.txt: [("'twas", 'twas')] PHJ18891101-V04-11-page11.txt: [("'Take", 'Take')] PHJ18891101-V04-11-page12.txt: [("'To", 'To')] PHJ18891101-V04-11-page23.txt: [("'simple", 'simple')] PHJ18891101-V04-11-page24.txt: [("'taking", 'taking')] PHJ18891101-V04-11-page26.txt: [("'Temperance", 'Temperance')] PHJ18891101-V04-11-page3.txt: [("'But", 'But')] PHJ18891201-V04-12-page1.txt: [("'live", 'live')] PHJ18891201-V04-12-page31.txt: [("'While", 'While')] PHJ18891201-V04-12-page32.txt: [("'I", 'I')] PHJ18891201-V04-12-page33.txt: [("'or", 'or')] PHJ18891201-V04-12-page34.txt: [("'ea.", 'ea.'), ("'s", 's')] PHJ18900101-V05-01-page1.txt: [("'visitation", 'visitation'), ("'judgment", 'judgment')] PHJ18900101-V05-01-page10.txt: [("'Pears", 'Pears'), ("'ill", 'ill'), ("'pear", 'pear'), ("'pears", 'pears'), ("'pears", 'pears')] PHJ18900101-V05-01-page11.txt: [("'bout", 'bout')] PHJ18900101-V05-01-page12.txt: [("'hale", 'hale')] PHJ18900101-V05-01-page16.txt: [("'neath", 'neath'), ("'neath", 'neath')] PHJ18900101-V05-01-page24.txt: [("'dure", 'dure')] PHJ18900101-V05-01-page26.txt: [("'which", 'which')] PHJ18900101-V05-01-page28.txt: [("'or", 'or')] PHJ18900101-V05-01-page3.txt: [("'the", 'the')] PHJ18900201-V05-02-page10.txt: [("'cause", 'cause')] PHJ18900201-V05-02-page11.txt: [("'Pears", 'Pears'), ("'cept", 'cept'), ("'pears", 'pears'), ("'fo", 'fo'), ("'that's", 'thats'), ("'tis", 'tis')] PHJ18900201-V05-02-page12.txt: [("'evil", 'evil')] PHJ18900201-V05-02-page15.txt: [("'THE", 'THE')] PHJ18900201-V05-02-page20.txt: [("'Tis", 'Tis')] PHJ18900201-V05-02-page21.txt: [("'toil", 'toil')] PHJ18900201-V05-02-page23.txt: [("'Cats", 'Cats')] PHJ18900201-V05-02-page25.txt: [("'was", 'was'), ("'that", 'that')] PHJ18900201-V05-02-page29.txt: [("'Co", 'Co')] PHJ18900201-V05-02-page31.txt: [("'I", 'I'), ("'spring", 'spring')] PHJ18900201-V05-02-page32.txt: [("'Orders", 'Orders')] PHJ18900301-V05-03-page12.txt: [("'twas", 'twas'), ("'neath", 'neath'), ("'Twas", 'Twas')] PHJ18900301-V05-03-page13.txt: [("'We", 'We'), ("'What", 'What'), ("'Master", 'Master')] PHJ18900301-V05-03-page14.txt: [("''pointing", 'pointing'), ("'Ye're", 'Yere'), ("'What", 'What'), ("'Were", 'Were'), ("'It", 'It'), ("'I", 'I'), ("'and", 'and'), ("'Well", 'Well'), ("'that", 'that')] PHJ18900301-V05-03-page15.txt: [("'Your", 'Your'), ("'many", 'many'), ("'you", 'you')] PHJ18900301-V05-03-page16.txt: [("'avert", 'avert'), ("'dispensation", 'dispensation'), ("'part", 'part')] PHJ18900301-V05-03-page17.txt: [("'respecter", 'respecter')] PHJ18900301-V05-03-page20.txt: [("'twould", 'twould')] PHJ18900301-V05-03-page21.txt: [("'tis", 'tis'), ("'There", 'There')] PHJ18900301-V05-03-page25.txt: [("'er", 'er'), ("'er", 'er'), ("'f", 'f')] PHJ18900301-V05-03-page26.txt: [("'physicians", 'physicians')] PHJ18900301-V05-03-page28.txt: [("'.", '.')] PHJ18900301-V05-03-page29.txt: [("'the", 'the')] PHJ18900301-V05-03-page8.txt: [("'TB", 'TB')] PHJ18900401-V05-04-page1.txt: [("'A", 'A')] PHJ18900401-V05-04-page12.txt: [("'Tis", 'Tis')] PHJ18900401-V05-04-page16.txt: [("'tis", 'tis'), ("'little", 'little')] PHJ18900401-V05-04-page17.txt: [("'anecdotes.", 'anecdotes.'), ("'You", 'You'), ("'You", 'You'), ("'manna", 'manna'), ("'There", 'There')] PHJ18900401-V05-04-page21.txt: [("'For", 'For'), ("'And", 'And'), ("'they're", 'theyre')] PHJ18900401-V05-04-page24.txt: [("'Tis", 'Tis')] PHJ18900401-V05-04-page26.txt: [("'more", 'more')] PHJ18900401-V05-04-page29.txt: [("'.", '.')] PHJ18900401-V05-04-page31.txt: [("'flexible", 'flexible')] PHJ18900401-V05-04-page4.txt: [("'room", 'room')] PHJ18900401-V05-04-page5.txt: [("'rhe", 'rhe')] PHJ18900501-V05-05-page12.txt: [("'tis", 'tis'), ("'Tis", 'Tis'), ("'increase", 'increase')] PHJ18900501-V05-05-page13.txt: [("'remarkable", 'remarkable'), ("'honest", 'honest')] PHJ18900501-V05-05-page18.txt: [("'rings", 'rings')] PHJ18900501-V05-05-page21.txt: [("'a", 'a'), ("'A", 'A')] PHJ18900501-V05-05-page24.txt: [("'fore", 'fore')] PHJ18900501-V05-05-page29.txt: [("'ice", 'ice')] PHJ18900501-V05-05-page30.txt: [("'Warranted", 'Warranted')] PHJ18900501-V05-05-page31.txt: [("'.", '.')] PHJ18900501-V05-05-page7.txt: [("'But", 'But')] PHJ18900601-V05-06-page1.txt: [("'fever.", 'fever.')] PHJ18900601-V05-06-page14.txt: [("'tired", 'tired')] PHJ18900601-V05-06-page15.txt: [("'education", 'education')] PHJ18900601-V05-06-page20.txt: [("'which", 'which')] PHJ18900601-V05-06-page21.txt: [("'railroads", 'railroads')] PHJ18900601-V05-06-page23.txt: [("'feel", 'feel')] PHJ18900601-V05-06-page31.txt: [("'c", 'c')] PHJ18900701-V05-07-page11.txt: [("'excessive", 'excessive'), ("'The", 'The')] PHJ18900701-V05-07-page12.txt: [("'Dead", 'Dead')] PHJ18900701-V05-07-page15.txt: [("'mid", 'mid'), ("'Tis", 'Tis'), ("'neath", 'neath'), ("'mid", 'mid')] PHJ18900701-V05-07-page18.txt: [("'What's", 'Whats'), ("'Man", 'Man')] PHJ18900701-V05-07-page19.txt: [("'What", 'What'), ("'Onions", 'Onions'), ("'that's", 'thats'), ("'By", 'By')] PHJ18900701-V05-07-page24.txt: [("'twere", 'twere')] PHJ18900701-V05-07-page29.txt: [("'You", 'You')] PHJ18900701-V05-07-page31.txt: [("'cl", 'cl')] PHJ18900701-V05-07-page6.txt: [("'that", 'that'), ("'That", 'That')] PHJ18900801-V05-08-page10.txt: [("'Sweet", 'Sweet')] PHJ18900801-V05-08-page12.txt: [("'extensively", 'extensively')] PHJ18900801-V05-08-page14.txt: [("'the", 'the')] PHJ18900801-V05-08-page15.txt: [("'scenes", 'scenes'), ("'tis", 'tis'), ("'tween", 'tween')] PHJ18900801-V05-08-page24.txt: [("'tis", 'tis'), ("'Gainst", 'Gainst')] PHJ18900801-V05-08-page25.txt: [("'Co", 'Co')] PHJ18900801-V05-08-page28.txt: [("'I", 'I')] PHJ18900801-V05-08-page4.txt: [("'not", 'not')] PHJ18900801-V05-08-page7.txt: [("'first", 'first')] PHJ18900801-V05-08-page8.txt: [("'John", 'John')] PHJ18900801-V05-08-page9.txt: [("'Elizabeth", 'Elizabeth')] PHJ18900901-V05-09-page11.txt: [("'tis", 'tis'), ("'fraid", 'fraid'), ("'em", 'em'), ("'em", 'em')] PHJ18900901-V05-09-page15.txt: [("'Neath", 'Neath')] PHJ18900901-V05-09-page18.txt: [("'evenings", 'evenings')] PHJ18900901-V05-09-page21.txt: [("'whatever", 'whatever'), ("'an", 'an')] PHJ18900901-V05-09-page27.txt: [("'the", 'the')] PHJ18900901-V05-09-page28.txt: [("'promoting", 'promoting'), ('\'"', '"'), ("'r", 'r'), ("'Gold", 'Gold')] PHJ18900901-V05-09-page9.txt: [("'Coughs", 'Coughs')] PHJ18901001-V05-10-page10.txt: [("'tis", 'tis')] PHJ18901001-V05-10-page11.txt: [("'Each", 'Each')] PHJ18901001-V05-10-page14.txt: [("'The", 'The'), ("'said", 'said')] PHJ18901001-V05-10-page18.txt: [("'both", 'both')] PHJ18901001-V05-10-page19.txt: [("'Tis", 'Tis'), ("'Tis", 'Tis'), ("'tis", 'tis'), ("'tis", 'tis')] PHJ18901001-V05-10-page25.txt: [("'vision", 'vision')] PHJ18901001-V05-10-page28.txt: [("'t", 't')] PHJ18901001-V05-10-page3.txt: [("'The", 'The')] PHJ18901001-V05-10-page31.txt: [("'EWE", 'EWE'), ("'-", '-'), ("'i", 'i')] PHJ18901001-V05-10-page7.txt: [("'Mid", 'Mid')] PHJ18901101-V05-11-page11.txt: [("'tis", 'tis'), ("'tis", 'tis')] PHJ18901101-V05-11-page15.txt: [("'Tis", 'Tis')] PHJ18901101-V05-11-page26.txt: [("'willing", 'willing')] PHJ18901101-V05-11-page28.txt: [("'S", 'S')] PHJ18901101-V05-11-page29.txt: [("'inducements", 'inducements')] PHJ18901101-V05-11-page31.txt: [("'this", 'this'), ("'FALSE", 'FALSE')] PHJ18901101-V05-11-page6.txt: [("'But", 'But')] PHJ18901101-V05-11-page7.txt: [("'all", 'all')] PHJ18901201-V05-12-page1.txt: [("'There", 'There')] PHJ18901201-V05-12-page13.txt: [("'TOBACCO", 'TOBACCO')] PHJ18901201-V05-12-page14.txt: [("'Saul", 'Saul')] PHJ18901201-V05-12-page15.txt: [("'twould", 'twould'), ("'good", 'good'), ("'Twas", 'Twas')] PHJ18901201-V05-12-page16.txt: [("'tis", 'tis')] PHJ18901201-V05-12-page20.txt: [("'No", 'No')] PHJ18901201-V05-12-page24.txt: [("'round", 'round')] PHJ18901201-V05-12-page25.txt: [("'Co", 'Co')] PHJ18901201-V05-12-page26.txt: [("'of", 'of')] PHJ18901201-V05-12-page28.txt: [("'earn", 'earn')] PHJ18901201-V05-12-page30.txt: [("'ilinTirliTiFillairirtigillitiffITIFFESTITFIR", 'ilinTirliTiFillairirtigillitiffITIFFESTITFIR'), ("'-il", '-il')] PHJ18901201-V05-12-page31.txt: [('\'"', '"')] PHJ18901201-V05-12-page34.txt: [("'Temperance", 'Temperance'), ("'Tea", 'Tea'), ("'Talk", 'Talk'), ("'Three", 'Three'), ("'Pest", 'Pest'), ("'Tight", 'Tight'), ("'Tight", 'Tight')] PHJ18901201-V05-12-page4.txt: [("'was", 'was')] PHJ18901201-V05-12-page7.txt: [("'I'here", 'Ihere')] PHJ18910101-V06-01-page10.txt: [("'Twas", 'Twas')] PHJ18910101-V06-01-page13.txt: [("'Twill", 'Twill'), ("'poorly", 'poorly')] PHJ18910101-V06-01-page14.txt: [("'thout", 'thout')] PHJ18910101-V06-01-page16.txt: [("'.", '.'), ("'women", 'women'), ("'Come", 'Come')] PHJ18910101-V06-01-page17.txt: [("'mother", 'mother')] PHJ18910101-V06-01-page2.txt: [("'more", 'more')] PHJ18910101-V06-01-page22.txt: [("'whether", 'whether')] PHJ18910101-V06-01-page24.txt: [("'em", 'em'), ("'Tis", 'Tis'), ("'redeeming", 'redeeming')] PHJ18910101-V06-01-page26.txt: [("'used", 'used')] PHJ18910101-V06-01-page31.txt: [("'As", 'As'), ("'The", 'The')] PHJ18910101-V06-01-page32.txt: [("'.", '.'), ("'T", 'T'), ("'AirlfliTlirn", 'AirlfliTlirn'), ('\'"...', '"...')] PHJ18910101-V06-01-page7.txt: [("'Tis", 'Tis')] PHJ18910201-V06-02-page1.txt: [("'easily", 'easily')] PHJ18910201-V06-02-page14.txt: [("'boy", 'boy')] PHJ18910201-V06-02-page15.txt: [("'Into", 'Into')] PHJ18910201-V06-02-page16.txt: [("'The", 'The')] PHJ18910201-V06-02-page18.txt: [("'Lord", 'Lord')] PHJ18910201-V06-02-page21.txt: [("'The", 'The')] PHJ18910201-V06-02-page29.txt: [("'I", 'I')] PHJ18910201-V06-02-page31.txt: [("'SEA'arAtV", 'SEAarAtV')] PHJ18910201-V06-02-page32.txt: [("'ifTriirittialiiMI", 'ifTriirittialiiMI')] PHJ18910201-V06-02-page8.txt: [("'off", 'off')] PHJ18910201-V06-02-page9.txt: [("'not", 'not')] PHJ18910301-V06-03-page13.txt: [("'in", 'in')] PHJ18910301-V06-03-page15.txt: [("'If", 'If')] PHJ18910301-V06-03-page16.txt: [("'Now", 'Now'), ("'t", 't')] PHJ18910301-V06-03-page17.txt: [("'Tis", 'Tis'), ("'This", 'This')] PHJ18910301-V06-03-page26.txt: [("'sentiment", 'sentiment')] PHJ18910301-V06-03-page29.txt: [("'functual", 'functual'), ("'I", 'I')] PHJ18910301-V06-03-page31.txt: [("'hese", 'hese')] PHJ18910301-V06-03-page6.txt: [("'the", 'the')] PHJ18910301-V06-03-page7.txt: [("'Tts", 'Tts')] PHJ18910401-V06-04-page21.txt: [("'Twas", 'Twas')] PHJ18910401-V06-04-page8.txt: [("'Twould", 'Twould')] PHJ18910501-V06-05-page14.txt: [("'Produces", 'Produces')] PHJ18910501-V06-05-page15.txt: [("'of", 'of')] PHJ18910501-V06-05-page18.txt: [("'Forget", 'Forget'), ("'floating", 'floating'), ("'healing", 'healing')] PHJ18910501-V06-05-page25.txt: [('\'stays."', 'stays."')] PHJ18910501-V06-05-page27.txt: [("'pink", 'pink')] PHJ18910501-V06-05-page29.txt: [("'dress", 'dress')] PHJ18910501-V06-05-page30.txt: [("'Waists", 'Waists')] PHJ18910601-V06-06-page18.txt: [("'Hoosier", 'Hoosier'), ("'Ancient", 'Ancient'), ("'Ancient", 'Ancient')] PHJ18910601-V06-06-page22.txt: [("'tis", 'tis')] PHJ18910601-V06-06-page24.txt: [("'that", 'that')] PHJ18910601-V06-06-page25.txt: [("'Neath", 'Neath')] PHJ18910601-V06-06-page26.txt: [("'I", 'I')] PHJ18910701-V06-07-page13.txt: [("'Rs", 'Rs')] PHJ18910701-V06-07-page14.txt: [("'the", 'the')] PHJ18910701-V06-07-page24.txt: [("'Twas", 'Twas')] PHJ18910701-V06-07-page29.txt: [("'Mimic", 'Mimic')] PHJ18910701-V06-07-page30.txt: [("'.olumes.", '.olumes.')] PHJ18910701-V06-07-page31.txt: [("'Wen", 'Wen')] PHJ18910701-V06-07-page6.txt: [("'is", 'is'), ("'In", 'In')] PHJ18910701-V06-07-page8.txt: [("'Tis", 'Tis')] PHJ18910801-V06-08-page11.txt: [("'perfectly", 'perfectly')] PHJ18910801-V06-08-page12.txt: [("'devoted", 'devoted')] PHJ18910801-V06-08-page14.txt: [("'Tis", 'Tis'), ("'tis", 'tis'), ("'tis", 'tis')] PHJ18910801-V06-08-page15.txt: [("'dead-lift", 'dead-lift')] PHJ18910801-V06-08-page17.txt: [("'What", 'What'), ("'If", 'If')] PHJ18910801-V06-08-page18.txt: [("'Ye", 'Ye'), ("'And", 'And'), ("'the", 'the'), ("'proceed", 'proceed'), ("'suffered", 'suffered')] PHJ18910801-V06-08-page19.txt: [("'regenerated.", 'regenerated.')] PHJ18910801-V06-08-page27.txt: [("'Cases", 'Cases')] PHJ18910801-V06-08-page31.txt: [("'V", 'V'), ("'riven", 'riven')] PHJ18910801-V06-08-page9.txt: [("'twixt", 'twixt')] PHJ18910901-V06-09-page11.txt: [("'plays", 'plays')] PHJ18910901-V06-09-page12.txt: [("'Twas", 'Twas'), ("'kills", 'kills')] PHJ18910901-V06-09-page13.txt: [("'slop", 'slop')] PHJ18910901-V06-09-page15.txt: [("'Tis", 'Tis'), ("'mid", 'mid'), ("'I", 'I')] PHJ18910901-V06-09-page16.txt: [("'Ye", 'Ye'), ("'Where", 'Where')] PHJ18910901-V06-09-page17.txt: [("'a", 'a')] PHJ18910901-V06-09-page26.txt: [("'Write", 'Write')] PHJ18910901-V06-09-page30.txt: [("'MEDICAL", 'MEDICAL')] PHJ18911001-V06-10-page1.txt: [("'Health", 'Health')] PHJ18911001-V06-10-page11.txt: [("'finding", 'finding')] PHJ18911001-V06-10-page12.txt: [("'the", 'the'), ("'proceed", 'proceed')] PHJ18911001-V06-10-page14.txt: [("'pocket.", 'pocket.')] PHJ18911001-V06-10-page16.txt: [("'I", 'I'), ("'the", 'the')] PHJ18911001-V06-10-page17.txt: [("'Blessed", 'Blessed'), ("'all", 'all'), ("'Inasmuch", 'Inasmuch'), ("'Thou", 'Thou')] PHJ18911001-V06-10-page18.txt: [("'the", 'the')] PHJ18911001-V06-10-page2.txt: [("'description", 'description')] PHJ18911001-V06-10-page20.txt: [("'it", 'it')] PHJ18911001-V06-10-page25.txt: [("'tis", 'tis')] PHJ18911001-V06-10-page26.txt: [("'No", 'No'), ("'North", 'North')] PHJ18911001-V06-10-page3.txt: [("'induced", 'induced'), ("'observer.", 'observer.'), ("'creating", 'creating')] PHJ18911001-V06-10-page30.txt: [("'Style", 'Style')] PHJ18911001-V06-10-page31.txt: [("'Extension", 'Extension'), ("'Sides", 'Sides'), ("'of", 'of')] PHJ18911001-V06-10-page4.txt: [("'a", 'a')] PHJ18911001-V06-10-page5.txt: [("'Others", 'Others')] PHJ18911001-V06-10-page6.txt: [("'twill", 'twill')] PHJ18911001-V06-10-page7.txt: [("'education", 'education'), ("'saturated", 'saturated')] PHJ18911001-V06-10-page8.txt: [("'sturb", 'sturb'), ("'cause", 'cause'), ("'portant", 'portant'), ("'Flo", 'Flo'), ("'cause", 'cause')] PHJ18911101-V06-11-page12.txt: [("'Let", 'Let'), ("'Thou", 'Thou')] PHJ18911101-V06-11-page15.txt: [("'abdomen", 'abdomen')] PHJ18911101-V06-11-page16.txt: [("'Neath", 'Neath'), ("'Mid", 'Mid')] PHJ18911101-V06-11-page19.txt: [("'of", 'of')] PHJ18911101-V06-11-page2.txt: [("'come", 'come')] PHJ18911101-V06-11-page25.txt: [("'of", 'of')] PHJ18911101-V06-11-page27.txt: [("'.", '.')] PHJ18911101-V06-11-page3.txt: [("'The", 'The')] PHJ18911101-V06-11-page31.txt: [("'When", 'When')] PHJ18911201-V06-12-page13.txt: [("'Not", 'Not')] PHJ18911201-V06-12-page21.txt: [("'overstimulation", 'overstimulation')] PHJ18911201-V06-12-page22.txt: [("'.way", '.way')] PHJ18911201-V06-12-page24.txt: [("'Tis", 'Tis'), ("'tis", 'tis')] PHJ18911201-V06-12-page26.txt: [("'make", 'make')] PHJ18911201-V06-12-page28.txt: [("'or", 'or')] PHJ18911201-V06-12-page31.txt: [("'MALAY", 'MALAY')] PHJ18911201-V06-12-page32.txt: [("'and", 'and')] PHJ18911201-V06-12-page33.txt: [("'Tobacco", 'Tobacco')] PHJ18911201-V06-12-page34.txt: [("'rom", 'rom'), ("'reaching", 'reaching')] PHJ18911201-V06-12-page7.txt: [("'The", 'The')] PHJ18911201-V06-12-page9.txt: [("'midst", 'midst'), ("'of", 'of')] PHJ18920101-V07-01-page12.txt: [("'take", 'take')] PHJ18920101-V07-01-page13.txt: [("'come", 'come')] PHJ18920101-V07-01-page17.txt: [("'Twere", 'Twere')] PHJ18920101-V07-01-page18.txt: [("'Tis", 'Tis')] PHJ18920101-V07-01-page24.txt: [("'of", 'of')] PHJ18920101-V07-01-page25.txt: [("'Tis", 'Tis')] PHJ18920101-V07-01-page27.txt: [("'Thomas", 'Thomas')] PHJ18920101-V07-01-page29.txt: [("'Volume", 'Volume')] PHJ18920101-V07-01-page30.txt: [("'LAND", 'LAND')] PHJ18920201-V07-02-page12.txt: [("'mong", 'mong'), ("'mong", 'mong')] PHJ18920201-V07-02-page14.txt: [("'You", 'You')] PHJ18920201-V07-02-page15.txt: [("'Steenth", 'Steenth')] PHJ18920201-V07-02-page16.txt: [("'Ye", 'Ye'), ("'he", 'he'), ("'Now", 'Now'), ("'in", 'in')] PHJ18920201-V07-02-page17.txt: [("'The", 'The')] PHJ18920201-V07-02-page19.txt: [("'Love", 'Love')] PHJ18920201-V07-02-page22.txt: [('\'"', '"'), ("'twas", 'twas'), ("'twould", 'twould')] PHJ18920201-V07-02-page25.txt: [("'we", 'we')] PHJ18920201-V07-02-page27.txt: [("'Publishers", 'Publishers')] PHJ18920201-V07-02-page29.txt: [("'teat", 'teat'), ("'Old", 'Old')] PHJ18920201-V07-02-page30.txt: [("'a.", 'a.'), ("'h", 'h')] PHJ18920201-V07-02-page31.txt: [("'Al", 'Al')] PHJ18920301-V07-03-page12.txt: [("'All", 'All')] PHJ18920301-V07-03-page15.txt: [("'way", 'way'), ("'way", 'way')] PHJ18920301-V07-03-page16.txt: [("'taken", 'taken')] PHJ18920301-V07-03-page17.txt: [("'at", 'at')] PHJ18920301-V07-03-page18.txt: [("'Tis", 'Tis')] PHJ18920301-V07-03-page21.txt: [("'twixt", 'twixt'), ("'twixt", 'twixt')] PHJ18920301-V07-03-page26.txt: [("'My", 'My')] PHJ18920301-V07-03-page28.txt: [("'volumes", 'volumes')] PHJ18920301-V07-03-page32.txt: [("'o", 'o'), ("'As", 'As')] PHJ18920301-V07-03-page4.txt: [("'to", 'to')] PHJ18920401-V07-04-page11.txt: [("'sun", 'sun')] PHJ18920401-V07-04-page12.txt: [("'.They", '.They')] PHJ18920401-V07-04-page13.txt: [("'.", '.')] PHJ18920401-V07-04-page14.txt: [("'Nature", 'Nature')] PHJ18920401-V07-04-page15.txt: [("'this", 'this'), ("'Spring", 'Spring')] PHJ18920401-V07-04-page17.txt: [("'How", 'How')] PHJ18920401-V07-04-page20.txt: [("'Whether", 'Whether'), ("'THE", 'THE'), ("'the", 'the'), ("'a", 'a'), ("'little", 'little')] PHJ18920401-V07-04-page22.txt: [("'Now", 'Now'), ("'I", 'I'), ("'Yes", 'Yes')] PHJ18920401-V07-04-page23.txt: [("'come", 'come')] PHJ18920401-V07-04-page24.txt: [("'T", 'T')] PHJ18920401-V07-04-page32.txt: [("'Jewspaper", 'Jewspaper')] PHJ18920401-V07-04-page5.txt: [("'do", 'do'), ("'r", 'r')] PHJ18920401-V07-04-page9.txt: [("'A", 'A')] PHJ18920501-V07-05-page10.txt: [("'it", 'it')] PHJ18920501-V07-05-page21.txt: [("'enough", 'enough')] PHJ18920501-V07-05-page24.txt: [("'Co", 'Co')] PHJ18920601-V07-06-page11.txt: [("'This", 'This')] PHJ18920601-V07-06-page14.txt: [("'dark", 'dark')] PHJ18920601-V07-06-page25.txt: [("'never", 'never'), ("'turn", 'turn'), ("'Loose", 'Loose'), ("'mean", 'mean'), ("'The", 'The'), ("'drop", 'drop')] PHJ18920601-V07-06-page26.txt: [("'polished", 'polished')] PHJ18920601-V07-06-page28.txt: [("'powes", 'powes')] PHJ18920601-V07-06-page30.txt: [("'Ind", 'Ind')] PHJ18920701-V07-07-page1.txt: [("'Pacific", 'Pacific')] PHJ18920701-V07-07-page11.txt: [("'He", 'He'), ("'The", 'The')] PHJ18920701-V07-07-page15.txt: [("'pitied", 'pitied')] PHJ18920701-V07-07-page17.txt: [("'teetotal", 'teetotal')] PHJ18920701-V07-07-page18.txt: [("'There", 'There')] PHJ18920701-V07-07-page23.txt: [("'neath", 'neath'), ("'Tis", 'Tis'), ("'tis", 'tis')] PHJ18920701-V07-07-page27.txt: [("'anyone", 'anyone'), ("'Compound", 'Compound'), ("'Ohio", 'Ohio')] PHJ18920701-V07-07-page28.txt: [("'Will", 'Will')] PHJ18920701-V07-07-page29.txt: [("'they", 'they')] PHJ18920701-V07-07-page31.txt: [("'flour", 'flour')] PHJ18920701-V07-07-page32.txt: [("'Marvelous", 'Marvelous')] PHJ18920701-V07-07-page4.txt: [("'.ccurately", '.ccurately')] PHJ18920701-V07-07-page6.txt: [("'population", 'population')] PHJ18920801-V07-08-page14.txt: [("'Joe", 'Joe'), ("'Tennessee", 'Tennessee')] PHJ18920801-V07-08-page17.txt: [("'clay", 'clay'), ("'dumb", 'dumb'), ("'have", 'have'), ("'And", 'And'), ("'Let", 'Let')] PHJ18920801-V07-08-page18.txt: [('\'Excelsior.\'"', 'Excelsior."'), ("'What", 'What'), ("'em", 'em')] PHJ18920801-V07-08-page2.txt: [("'Vault", 'Vault'), ("'danger", 'danger')] PHJ18920801-V07-08-page24.txt: [("'properly-prepared", 'properly-prepared'), ("'Vhen", 'Vhen')] PHJ18920801-V07-08-page25.txt: [("'any", 'any')] PHJ18920801-V07-08-page26.txt: [("'mean", 'mean')] PHJ18920801-V07-08-page27.txt: [("'consumptive", 'consumptive')] PHJ18920801-V07-08-page3.txt: [("'Wren", 'Wren'), ("'and", 'and')] PHJ18920801-V07-08-page4.txt: [("'sugar", 'sugar'), ("'sugar", 'sugar')] PHJ18920801-V07-08-page7.txt: [("'That", 'That')] PHJ18920801-V07-08-page8.txt: [("'man", 'man'), ("'ten", 'ten'), ("'scrofula", 'scrofula')] PHJ18920901-V07-09-page15.txt: [("'It", 'It'), ("'The", 'The'), ("'thoughts", 'thoughts'), ("'Her", 'Her')] PHJ18920901-V07-09-page16.txt: [("'What", 'What'), ("'It", 'It')] PHJ18920901-V07-09-page17.txt: [("'Your", 'Your')] PHJ18920901-V07-09-page18.txt: [("'the", 'the')] PHJ18920901-V07-09-page6.txt: [("'rise", 'rise')] PHJ18920901-V07-09-page9.txt: [("'Hasty", 'Hasty')] PHJ18921001-V07-10-page1.txt: [("'Vacific", 'Vacific')] PHJ18921001-V07-10-page10.txt: [("'Nessel", 'Nessel')] PHJ18921001-V07-10-page13.txt: [("'Tis", 'Tis'), ("'Tis", 'Tis')] PHJ18921001-V07-10-page15.txt: [("'I", 'I')] PHJ18921001-V07-10-page18.txt: [("'twere", 'twere'), ("'Mong", 'Mong')] PHJ18921001-V07-10-page2.txt: [("'the", 'the'), ("'of", 'of')] PHJ18921001-V07-10-page20.txt: [("'that", 'that')] PHJ18921001-V07-10-page21.txt: [("'It", 'It'), ("'more.", 'more.')] PHJ18921001-V07-10-page24.txt: [("'other", 'other')] PHJ18921001-V07-10-page26.txt: [("'DRESSING.", 'DRESSING.')] PHJ18921001-V07-10-page27.txt: [("'.common", '.common')] PHJ18921001-V07-10-page28.txt: [("'-Mrs.", '-Mrs.'), ("'hillside", 'hillside'), ("'I.'a.", 'I.a.')] PHJ18921001-V07-10-page30.txt: [("'Water", 'Water')] PHJ18921001-V07-10-page32.txt: [("'HASHERS", 'HASHERS')] PHJ18921001-V07-10-page4.txt: [("'ACIFIC", 'ACIFIC')] PHJ18921001-V07-10-page9.txt: [("'veins", 'veins')] PHJ18921101-V07-11-page13.txt: [("'an", 'an')] PHJ18921101-V07-11-page22.txt: [("'e", 'e')] PHJ18921101-V07-11-page29.txt: [("'The", 'The'), ("'The", 'The')] PHJ18921101-V07-11-page3.txt: [("'Elements", 'Elements')] PHJ18921101-V07-11-page30.txt: [("'Great", 'Great')] PHJ18921201-V07-12-page10.txt: [("'food", 'food'), ("'three", 'three'), ("'Some", 'Some')] PHJ18921201-V07-12-page11.txt: [("'In", 'In')] PHJ18921201-V07-12-page18.txt: [("'CONDUCTED", 'CONDUCTED')] PHJ18921201-V07-12-page21.txt: [('\'\'Obedience"', 'Obedience"')] PHJ18921201-V07-12-page26.txt: [("'TWERE", 'TWERE')] PHJ18921201-V07-12-page29.txt: [('\'"', '"')] PHJ18921201-V07-12-page30.txt: [("'Great", 'Great')] PHJ18921201-V07-12-page33.txt: [("'tirade", 'tirade')] PHJ18921201-V07-12-page5.txt: [("'were", 'were')] PHJ18921201-V07-12-page7.txt: [("'in", 'in')] PHJ18921201-V07-12-page8.txt: [("'herself", 'herself')] PHJ18960101-V11-01-page12.txt: [("'house-band", 'house-band'), ("'Behold", 'Behold')] PHJ18960101-V11-01-page16.txt: [("'order", 'order'), ("'The", 'The')] PHJ18960101-V11-01-page17.txt: [("'cooperation", 'cooperation'), ("'The", 'The')] PHJ18960101-V11-01-page26.txt: [("''Y", 'Y')] PHJ18960101-V11-01-page28.txt: [("'O", 'O')] PHJ18960101-V11-01-page5.txt: [("'The", 'The')] PHJ18960201-V11-02-page10.txt: [("'neath", 'neath')] PHJ18960201-V11-02-page11.txt: [("'comprehend", 'comprehend'), ("'The", 'The'), ("'fallen", 'fallen'), ("'fallen", 'fallen'), ("'fallen", 'fallen'), ("'the", 'the')] PHJ18960201-V11-02-page12.txt: [("'vital", 'vital'), ("'magnificent", 'magnificent'), ("'nervous", 'nervous'), ("'reasoning", 'reasoning')] PHJ18960201-V11-02-page18.txt: [("'neath", 'neath')] PHJ18960201-V11-02-page2.txt: [("'rubber", 'rubber')] PHJ18960201-V11-02-page21.txt: [("'Tis", 'Tis')] PHJ18960201-V11-02-page22.txt: [("'he", 'he')] PHJ18960201-V11-02-page26.txt: [("'twill", 'twill')] PHJ18960201-V11-02-page28.txt: [("'em", 'em')] PHJ18960201-V11-02-page29.txt: [("'favorably", 'favorably')] PHJ18960201-V11-02-page31.txt: [("'to", 'to'), ("'Many", 'Many'), ("'York", 'York')] PHJ18960201-V11-02-page5.txt: [("'kerchief", 'kerchief')] PHJ18960201-V11-02-page6.txt: [("'they", 'they')] PHJ18960301-V11-03-page11.txt: [("'twas", 'twas'), ("'Twas", 'Twas')] PHJ18960301-V11-03-page13.txt: [("'Why", 'Why'), ('\'"', '"')] PHJ18960301-V11-03-page14.txt: [("'a", 'a')] PHJ18960301-V11-03-page2.txt: [("'permit", 'permit')] PHJ18960301-V11-03-page24.txt: [("'em", 'em')] PHJ18960301-V11-03-page25.txt: [("'long", 'long'), ("'em", 'em')] PHJ18960301-V11-03-page29.txt: [("'has", 'has')] PHJ18960301-V11-03-page30.txt: [("'use", 'use')] PHJ18960301-V11-03-page31.txt: [("'Many", 'Many'), ("'not", 'not')] PHJ18960401-V11-04-page10.txt: [("'tis", 'tis'), ("'Twill", 'Twill'), ("'tis", 'tis')] PHJ18960401-V11-04-page12.txt: [("'LEAST", 'LEAST')] PHJ18960401-V11-04-page14.txt: [("'Godfrey's", 'Godfreys')] PHJ18960401-V11-04-page17.txt: [("'Bout", 'Bout'), ("'r", 'r'), ("'im", 'im'), ("'R", 'R'), ("'er", 'er'), ("'er", 'er'), ("'er", 'er'), ("'in", 'in'), ("'im", 'im')] PHJ18960401-V11-04-page19.txt: [("''SLEEP", 'SLEEP')] PHJ18960401-V11-04-page24.txt: [("'Look", 'Look')] PHJ18960401-V11-04-page25.txt: [("'In", 'In'), ("'In", 'In'), ("'In", 'In')] PHJ18960401-V11-04-page29.txt: [("'the", 'the')] PHJ18960401-V11-04-page31.txt: [("'but", 'but'), ("'York", 'York'), ("'INSTRUMENT", 'INSTRUMENT')] PHJ18960401-V11-04-page5.txt: [("'head", 'head')] PHJ18960401-V11-04-page6.txt: [("'I", 'I')] PHJ18960501-V11-05-page13.txt: [("'Rights", 'Rights'), ("'em", 'em'), ("'em.", 'em.'), ("'em", 'em')] PHJ18960501-V11-05-page15.txt: [("'lust", 'lust'), ("'chapter", 'chapter')] PHJ18960501-V11-05-page16.txt: [("'If", 'If'), ("'Not", 'Not'), ("'Love", 'Love'), ("'He", 'He'), ("'Whoso", 'Whoso'), ("'turned", 'turned'), ("'No", 'No'), ("'You", 'You'), ("'Mama", 'Mama'), ("'Mama", 'Mama'), ("'I've", 'Ive'), ("'love-force", 'love-force')] PHJ18960501-V11-05-page17.txt: [("'what", 'what')] PHJ18960501-V11-05-page20.txt: [("'to", 'to')] PHJ18960501-V11-05-page23.txt: [("'.", '.')] PHJ18960501-V11-05-page25.txt: [("'leaves", 'leaves')] PHJ18960501-V11-05-page27.txt: [("'use", 'use')] PHJ18960501-V11-05-page28.txt: [("'I", 'I'), ("'Well", 'Well'), ("'didn't", 'didnt'), ("'Yes", 'Yes'), ("'but", 'but'), ("'You", 'You'), ("'And", 'And')] PHJ18960501-V11-05-page31.txt: [("'Z", 'Z'), ("'Operating", 'Operating')] PHJ18960501-V11-05-page6.txt: [("'THE", 'THE')] PHJ18960601-V11-06-page10.txt: [("'The", 'The')] PHJ18960601-V11-06-page11.txt: [("'round", 'round')] PHJ18960601-V11-06-page22.txt: [("'neath", 'neath')] PHJ18960601-V11-06-page31.txt: [("'Many", 'Many')] PHJ18960601-V11-06-page32.txt: [("'...", '...'), ("'d", 'd'), ("'.", '.'), ('\'"', '"')] PHJ18960601-V11-06-page9.txt: [('\'sympathy"', 'sympathy"')] PHJ18960701-V11-07-page12.txt: [("'brain", 'brain')] PHJ18960701-V11-07-page16.txt: [("'Value", 'Value'), ("'a", 'a')] PHJ18960701-V11-07-page20.txt: [("'an", 'an'), ("'cross", 'cross')] PHJ18960701-V11-07-page22.txt: [("'stop", 'stop')] PHJ18960701-V11-07-page31.txt: [("'Many", 'Many')] PHJ18960701-V11-07-page32.txt: [("'ill", 'ill'), ('\'\'"..', '"..'), ('\'a."', 'a."')] PHJ18960701-V11-07-page7.txt: [("'.", '.')] PHJ18960801-V11-08-page13.txt: [("'The", 'The')] PHJ18960801-V11-08-page19.txt: [("'dreamt", 'dreamt')] PHJ18960801-V11-08-page22.txt: [("'.", '.')] PHJ18960801-V11-08-page23.txt: [("'em", 'em'), ("'twas", 'twas'), ("'em.", 'em.'), ("'twas", 'twas'), ("'If", 'If')] PHJ18960801-V11-08-page24.txt: [("'A.", 'A.')] PHJ18960801-V11-08-page25.txt: [("'of", 'of')] PHJ18960801-V11-08-page26.txt: [("'Sample", 'Sample')] PHJ18960801-V11-08-page28.txt: [("'Tis", 'Tis'), ("'Twill", 'Twill')] PHJ18960801-V11-08-page31.txt: [("'York", 'York')] PHJ18960901-V11-09-page10.txt: [("'Twas", 'Twas')] PHJ18960901-V11-09-page11.txt: [("'be", 'be')] PHJ18960901-V11-09-page13.txt: [("'twould", 'twould'), ("'twould", 'twould')] PHJ18960901-V11-09-page15.txt: [("'thought", 'thought')] PHJ18960901-V11-09-page17.txt: [("'hard", 'hard')] PHJ18960901-V11-09-page21.txt: [("'tis", 'tis')] PHJ18960901-V11-09-page23.txt: [("'Come", 'Come'), ("'Be", 'Be'), ("'Sarah", 'Sarah'), ("'d", 'd'), ("'Hello", 'Hello'), ("'Come", 'Come'), ("'but", 'but'), ("'you'll", 'youll'), ("'Come", 'Come'), ("'let's", 'lets'), ("'never", 'never'), ("'Here", 'Here'), ("'I", 'I'), ("'Mama", 'Mama'), ("'Stop", 'Stop'), ("'All", 'All')] PHJ18960901-V11-09-page24.txt: [("'.Johnny", '.Johnny')] PHJ18960901-V11-09-page28.txt: [("'Tis", 'Tis')] PHJ18960901-V11-09-page3.txt: [("'his", 'his')] PHJ18960901-V11-09-page30.txt: [("'Many", 'Many'), ("'but", 'but'), ("'not", 'not'), ("'York", 'York')] PHJ18960901-V11-09-page7.txt: [("'husband's", 'husbands')] PHJ18960901-V11-09-page9.txt: [("'to", 'to')] PHJ18961001-V11-10-page10.txt: [("'Tobacco", 'Tobacco')] PHJ18961001-V11-10-page13.txt: [("'The", 'The')] PHJ18961001-V11-10-page14.txt: [("'condition", 'condition')] PHJ18961001-V11-10-page24.txt: [("'kerchief", 'kerchief')] PHJ18961001-V11-10-page26.txt: [("'em", 'em')] PHJ18961001-V11-10-page5.txt: [("'done", 'done')] PHJ18961001-V11-10-page9.txt: [("'Tell", 'Tell'), ("'general", 'general'), ("'Who", 'Who'), ("'It", 'It'), ("'If", 'If'), ("'the", 'the')] PHJ18961101-V11-11-page1.txt: [("'Nothing", 'Nothing')] PHJ18961101-V11-11-page10.txt: [("'Non-Alcoholics", 'Non-Alcoholics')] PHJ18961101-V11-11-page18.txt: [("'not", 'not'), ("'pretty", 'pretty'), ("'Ground", 'Ground'), ("'then", 'then'), ("'hundreds", 'hundreds'), ("'And", 'And')] PHJ18961101-V11-11-page19.txt: [("'presently", 'presently')] PHJ18961101-V11-11-page2.txt: [("'make", 'make')] PHJ18961101-V11-11-page23.txt: [("'the", 'the')] PHJ18961101-V11-11-page24.txt: [("'nervous", 'nervous'), ("'tis", 'tis')] PHJ18961101-V11-11-page25.txt: [("'fectionate", 'fectionate')] PHJ18961101-V11-11-page26.txt: [("'might", 'might')] PHJ18961101-V11-11-page27.txt: [("'a", 'a')] PHJ18961101-V11-11-page28.txt: [("'the", 'the')] PHJ18961101-V11-11-page32.txt: [("'I", 'I'), ("'Ili", 'Ili'), ("'..", '..'), ("'ifiill", 'ifiill'), ('\'"ZingSliimiediullIVIgi', '"ZingSliimiediullIVIgi')] PHJ18961101-V11-11-page7.txt: [("'Functional", 'Functional')] PHJ18961101-V11-11-page8.txt: [("'the", 'the'), ("'There", 'There'), ("'The", 'The')] PHJ18961101-V11-11-page9.txt: [("'It", 'It')] PHJ18961201-V11-12-page10.txt: [("'Tis", 'Tis')] PHJ18961201-V11-12-page15.txt: [("'face", 'face'), ("'something", 'something'), ("'endeavor", 'endeavor')] PHJ18961201-V11-12-page17.txt: [("'can", 'can')] PHJ18961201-V11-12-page22.txt: [("'And", 'And')] PHJ18961201-V11-12-page23.txt: [("'The", 'The'), ("'Self-seeking", 'Self-seeking')] PHJ18961201-V11-12-page24.txt: [("'OURNAL.", 'OURNAL.')] PHJ18961201-V11-12-page25.txt: [("'Mr.", 'Mr.'), ("'these", 'these'), ("'but", 'but'), ("'but", 'but')] PHJ18961201-V11-12-page30.txt: [("'pass", 'pass')] PHJ18961201-V11-12-page33.txt: [("'levers", 'levers'), ("'s", 's')] PHJ18961201-V11-12-page5.txt: [("'The", 'The')] PHJ18990101-V14-01-page12.txt: [("'Tis", 'Tis'), ("'Tis", 'Tis')] PHJ18990201-V14-02-page10.txt: [("'skulls", 'skulls')] PHJ18990201-V14-02-page13.txt: [("'em", 'em')] PHJ18990201-V14-02-page15.txt: [("'end", 'end'), ("'count", 'count')] PHJ18990201-V14-02-page7.txt: [("'the", 'the')] PHJ18990301-V14-03-page11.txt: [("'number", 'number')] PHJ18990301-V14-03-page8.txt: [("'seems", 'seems')] PHJ18990401-V14-04-page16.txt: [("'should", 'should'), ("'I", 'I'), ("'Give", 'Give'), ('\'"Getout', '"Getout'), ("'You", 'You')] PHJ18990401-V14-04-page19.txt: [("'the", 'the'), ("'count", 'count')] PHJ18990501-V14-05-page10.txt: [("'Yes", 'Yes'), ("'you", 'you')] PHJ18990501-V14-05-page13.txt: [("'if", 'if')] PHJ18990501-V14-05-page14.txt: [("'no", 'no')] PHJ18990501-V14-05-page9.txt: [("'something", 'something')] PHJ18990601-V14-06-page11.txt: [("'stomach", 'stomach')] PHJ18990601-V14-06-page14.txt: [("'cause", 'cause')] PHJ18990601-V14-06-page16.txt: [("'Oh", 'Oh'), ("'You", 'You')] PHJ18990701-V14-07-page7.txt: [("'Twas", 'Twas')] PHJ18990801-V14-08-page12.txt: [("'used", 'used')] PHJ18990901-V14-09-page19.txt: [("'plain", 'plain')] PHJ18990901-V14-09-page9.txt: [("'These", 'These')] PHJ18991001-V14-10-page11.txt: [("'between", 'between')] PHJ18991001-V14-10-page3.txt: [("'GILW", 'GILW')] PHJ18991001-V14-10-page4.txt: [("'but", 'but')] PHJ18991101-V14-11-page9.txt: [("'Tis", 'Tis'), ("'Tis", 'Tis')] PHJ19010201-V16-02-page25.txt: [("'can", 'can')] PHJ19010201-V16-02-page26.txt: [("'his", 'his')] PHJ19010201-V16-02-page27.txt: [("'at", 'at')] PHJ19010201-V16-02-page32.txt: [("'Mr", 'Mr'), ("'glittering", 'glittering')] PHJ19010201-V16-02-page4.txt: [("'i", 'i')] PHJ19010301-V16-03-page23.txt: [("'are", 'are')] PHJ19010301-V16-03-page28.txt: [("'his", 'his')] PHJ19010301-V16-03-page32.txt: [("'sir", 'sir'), ("'r", 'r'), ("'sir", 'sir'), ("'glittering", 'glittering')] PHJ19010301-V16-03-page33.txt: [("'III", 'III'), ("'VE", 'VE')] PHJ19010301-V16-03-page34.txt: [("'sty", 'sty')] PHJ19010401-V16-04-page20.txt: [("'made", 'made')] PHJ19010401-V16-04-page21.txt: [("'Mr", 'Mr')] PHJ19010401-V16-04-page31.txt: [("'in", 'in')] PHJ19010401-V16-04-page33.txt: [("'I", 'I')] PHJ19010401-V16-04-page35.txt: [("'Or", 'Or')] PHJ19010501-V16-05-page19.txt: [("'PK", 'PK'), ("'SFr", 'SFr'), ("'AK", 'AK')] PHJ19010501-V16-05-page33.txt: [("'atteivi", 'atteivi'), ("'San", 'San')] PHJ19010501-V16-05-page35.txt: [("'t", 't')] PHJ19010501-V16-05-page36.txt: [("'ElLER", 'ElLER')] PHJ19010501-V16-05-page37.txt: [("'I", 'I'), ("'I", 'I')] PHJ19010601-V16-06-page33.txt: [("'should", 'should')] PHJ19010601-V16-06-page46.txt: [("'.S", '.S'), ("'N", 'N')] PHJ19010601-V16-06-page49.txt: [("'.", '.'), ("'I", 'I'), ("'r", 'r')] PHJ19010601-V16-06-page52.txt: [("'str", 'str')] PHJ19010701-V16-07-page1.txt: [("'DEPARTMENT", 'DEPARTMENT'), ("'l", 'l')] PHJ19010701-V16-07-page14.txt: [("'Tis", 'Tis')] PHJ19010701-V16-07-page27.txt: [("'stte", 'stte')] PHJ19010701-V16-07-page29.txt: [("'rep", 'rep')] PHJ19010701-V16-07-page32.txt: [("'.", '.')] PHJ19010701-V16-07-page35.txt: [("'lir", 'lir')] PHJ19010701-V16-07-page9.txt: [("'dead", 'dead')] PHJ19010801-V16-08-page20.txt: [("'cause", 'cause')] PHJ19010801-V16-08-page29.txt: [("'be", 'be')] PHJ19010801-V16-08-page32.txt: [("'.", '.')] PHJ19010801-V16-08-page36.txt: [("'St.", 'St.')] PHJ19010801-V16-08-page4.txt: [("'P.", 'P.')] PHJ19010901-V16-09-page19.txt: [("'Tis", 'Tis')] PHJ19010901-V16-09-page3.txt: [("'prevents", 'prevents')] PHJ19011001-V16-10-page13.txt: [("'to", 'to')] PHJ19011001-V16-10-page18.txt: [("'He", 'He')] PHJ19011001-V16-10-page29.txt: [("'sir", 'sir')] PHJ19011001-V16-10-page30.txt: [("'V", 'V')] PHJ19011001-V16-10-page34.txt: [("'.", '.')] PHJ19011001-V16-10-page5.txt: [("''the", 'the')] PHJ19011001-V16-10-page8.txt: [("'daily", 'daily')] PHJ19011101-V16-11-page30.txt: [('\'LIT"', 'LIT"'), ("'answered", 'answered')] PHJ19011101-V16-11-page33.txt: [("'Street", 'Street')] PHJ19011101-V16-11-page34.txt: [("'sir", 'sir')] PHJ19011101-V16-11-page9.txt: [("'a", 'a')] PHJ19011201-V16-12-page16.txt: [("'The", 'The')] PHJ19011201-V16-12-page19.txt: [("'can", 'can')] PHJ19011201-V16-12-page2.txt: [("'C.", 'C.')] PHJ19011201-V16-12-page4.txt: [('\'..\'"\'.."', '..".."'), ("'.", '.')] PHJ19020101-V17-01-page18.txt: [("'O", 'O')] PHJ19020101-V17-01-page34.txt: [("'PORT", 'PORT')] PHJ19020101-V17-01-page35.txt: [("'irraurif", 'irraurif')] PHJ19020101-V17-01-page36.txt: [("'I", 'I'), ("'TiTTVT", 'TiTTVT')] PHJ19020101-V17-01-page38.txt: [("'stir", 'stir')] PHJ19020201-V17-02-page19.txt: [("'railroad", 'railroad')] PHJ19020201-V17-02-page20.txt: [("'no", 'no')] PHJ19020201-V17-02-page30.txt: [("'.", '.'), ("'TT", 'TT')] PHJ19020201-V17-02-page31.txt: [('\'\'"', '"')] PHJ19020201-V17-02-page33.txt: [("'.", '.')] PHJ19020301-V17-03-page37.txt: [("'.", '.')] PHJ19020401-V17-04-page22.txt: [("'mong", 'mong'), ("'tis", 'tis')] PHJ19020401-V17-04-page23.txt: [("'mind", 'mind'), ("'result", 'result')] PHJ19020401-V17-04-page24.txt: [("'Go", 'Go'), ("'Aurora", 'Aurora'), ("'Consuelo", 'Consuelo')] PHJ19020401-V17-04-page29.txt: [("'''ket", 'ket')] PHJ19020501-V17-05-page11.txt: [("'with", 'with')] PHJ19020501-V17-05-page20.txt: [("'in", 'in')] PHJ19020501-V17-05-page25.txt: [("'a", 'a')] PHJ19020501-V17-05-page26.txt: [("'infections", 'infections')] PHJ19020501-V17-05-page7.txt: [("'O", 'O')] PHJ19020601-V17-06-page22.txt: [("'Oh", 'Oh')] PHJ19020601-V17-06-page28.txt: [("'or", 'or')] PHJ19020701-V17-07-page11.txt: [("'the", 'the')] PHJ19020701-V17-07-page8.txt: [("'the", 'the')] PHJ19020801-V17-08-page30.txt: [("'fri", 'fri')] PHJ19020901-V17-09-page18.txt: [("'Les", 'Les')] PHJ19021001-V17-10-page16.txt: [('\'em."', 'em."')] PHJ19021001-V17-10-page24.txt: [("'risky", 'risky'), ("'matter", 'matter')] PHJ19021001-V17-10-page26.txt: [("'should", 'should')] PHJ19021001-V17-10-page29.txt: [("'Regularly", 'Regularly')] PHJ19021001-V17-10-page32.txt: [("'Iarlagers", 'Iarlagers')] PHJ19021001-V17-10-page33.txt: [("'laths", 'laths')] PHJ19021001-V17-10-page36.txt: [("'By", 'By')] PHJ19021101-V17-11-page18.txt: [("'Tis", 'Tis')] PHJ19021101-V17-11-page20.txt: [('\'em."', 'em."')] PHJ19021101-V17-11-page22.txt: [("'The", 'The'), ("'Some", 'Some')] PHJ19021101-V17-11-page29.txt: [("'rig", 'rig')] PHJ19021101-V17-11-page6.txt: [("'ease", 'ease')] PHJ19021201-V17-12-page13.txt: [("'time", 'time')] PHJ19021201-V17-12-page22.txt: [("'health", 'health')] PHJ19021201-V17-12-page30.txt: [("'Regularly", 'Regularly')] PHJ19021201-V17-12-page4.txt: [("'D", 'D')] PHJ19021201-V17-12-page6.txt: [("'We", 'We')] PHJ19030101-V18-01-page21.txt: [("'bout", 'bout')] PHJ19030101-V18-01-page31.txt: [("'rig", 'rig')] PHJ19030101-V18-01-page32.txt: [('\'"', '"'), ("'PORTER", 'PORTER')] PHJ19030101-V18-01-page36.txt: [("'true", 'true')] PHJ19030201-V18-02-page2.txt: [("'true", 'true')] PHJ19030201-V18-02-page21.txt: [("'Tts", 'Tts')] PHJ19030201-V18-02-page22.txt: [("'of", 'of')] PHJ19030201-V18-02-page23.txt: [("'curve", 'curve')] PHJ19030201-V18-02-page35.txt: [("'cf", 'cf')] PHJ19030201-V18-02-page6.txt: [("'sufficient", 'sufficient')] PHJ19030201-V18-02-page8.txt: [("'and", 'and')] PHJ19030301-V18-03-page23.txt: [("'sfr", 'sfr')] PHJ19030301-V18-03-page24.txt: [("'Nor", 'Nor')] PHJ19030301-V18-03-page33.txt: [("'D.", 'D.')] PHJ19030301-V18-03-page4.txt: [("'Ivo", 'Ivo')] PHJ19030401-V18-04-page17.txt: [("'SW", 'SW')] PHJ19030401-V18-04-page21.txt: [("'PK", 'PK')] PHJ19030401-V18-04-page30.txt: [("'Uncle", 'Uncle')] PHJ19030401-V18-04-page36.txt: [("'NIL", 'NIL')] PHJ19030501-V18-05-page10.txt: [("'sfir", 'sfir')] PHJ19030501-V18-05-page12.txt: [("'give", 'give')] PHJ19030501-V18-05-page14.txt: [("'except", 'except'), ("'sleep", 'sleep')] PHJ19030501-V18-05-page17.txt: [("'stir", 'stir')] PHJ19030501-V18-05-page30.txt: [("'..", '..'), ("''S", 'S'), ("'o", 'o'), ("''Uncle", 'Uncle'), ("'rf", 'rf')] PHJ19030501-V18-05-page4.txt: [("'PliOCE", 'PliOCE')] PHJ19030501-V18-05-page7.txt: [("'and", 'and')] PHJ19030601-V18-06-page11.txt: [("'I'bis", 'Ibis')] PHJ19030601-V18-06-page12.txt: [("'The", 'The')] PHJ19030601-V18-06-page2.txt: [("'Physical", 'Physical')] PHJ19030601-V18-06-page27.txt: [("'PK", 'PK')] PHJ19030601-V18-06-page30.txt: [("'r", 'r')] PHJ19030701-V18-07-page2.txt: [("'AO", 'AO')] PHJ19030701-V18-07-page31.txt: [('\'\'immoral."', 'immoral."')] PHJ19030701-V18-07-page35.txt: [("'IA", 'IA'), ("'AV", 'AV'), ("'tci", 'tci')] PHJ19030701-V18-07-page36.txt: [("'.", '.'), ("'ILIMPItAtL", 'ILIMPItAtL')] PHJ19030801-V18-08-page18.txt: [("'twas", 'twas')] PHJ19030901-V18-09-page1.txt: [("'Next", 'Next')] PHJ19030901-V18-09-page16.txt: [("'tis", 'tis')] PHJ19030901-V18-09-page24.txt: [("'Par", 'Par')] PHJ19030901-V18-09-page34.txt: [("'Coolest", 'Coolest')] PHJ19030901-V18-09-page35.txt: [("'r", 'r')] PHJ19030901-V18-09-page8.txt: [("'word", 'word')] PHJ19031001-V18-10-page25.txt: [("'tis", 'tis')] PHJ19031001-V18-10-page30.txt: [("'em", 'em'), ("'em", 'em'), ("'No", 'No')] PHJ19031001-V18-10-page7.txt: [("'lusted", 'lusted')] PHJ19031101-V18-11-page13.txt: [("'Mr", 'Mr')] PHJ19031101-V18-11-page15.txt: [("'sir", 'sir'), ("'PK", 'PK')] PHJ19031101-V18-11-page21.txt: [("'phone.", 'phone.')] PHJ19031101-V18-11-page27.txt: [("'SW", 'SW')] PHJ19031101-V18-11-page3.txt: [("'.", '.'), ("'j", 'j'), ("'..", '..'), ("'..", '..')] PHJ19031101-V18-11-page32.txt: [("''The", 'The')] PHJ19031101-V18-11-page6.txt: [("'H", 'H')] PHJ19031201-V18-12-page28.txt: [("'SW", 'SW')] PHJ19031201-V18-12-page3.txt: [("'.", '.'), ("'..", '..'), ("'.Ji.", '.Ji.')] PHJ19031201-V18-12-page35.txt: [("'A", 'A'), ("'A", 'A')] PHJ19031201-V18-12-page4.txt: [("'Dubbing", 'Dubbing'), ("'Puts", 'Puts')] PHJ19040101-V19-01-page25.txt: [("'mong", 'mong'), ("'Tis", 'Tis')] PHJ19040101-V19-01-page28.txt: [("'neath", 'neath')] PHJ19040101-V19-01-page3.txt: [("'will", 'will')] PHJ19040201-V19-02-page12.txt: [("'who", 'who')] PHJ19040201-V19-02-page13.txt: [("'for", 'for')] PHJ19040201-V19-02-page16.txt: [("'Tis", 'Tis')] PHJ19040201-V19-02-page17.txt: [("'till", 'till')] PHJ19040201-V19-02-page2.txt: [("'creative", 'creative')] PHJ19040201-V19-02-page27.txt: [("'t", 't')] PHJ19040201-V19-02-page29.txt: [("'Bible.", 'Bible.')] PHJ19040201-V19-02-page35.txt: [("'crol", 'crol')] PHJ19040301-V19-03-page15.txt: [("'Twas", 'Twas')] PHJ19040301-V19-03-page19.txt: [("'found", 'found')] PHJ19040301-V19-03-page2.txt: [("'creative", 'creative')] PHJ19040301-V19-03-page24.txt: [("'ranks", 'ranks')] PHJ19040301-V19-03-page27.txt: [("'if", 'if'), ("'A", 'A')] PHJ19040301-V19-03-page34.txt: [("'clounk", 'clounk')] PHJ19040301-V19-03-page9.txt: [("'not", 'not')] PHJ19040401-V19-04-page12.txt: [("'a", 'a')] PHJ19040401-V19-04-page23.txt: [("'ow", 'ow'), ("'urt", 'urt')] PHJ19040401-V19-04-page24.txt: [("'ere's", 'eres'), ("'and", 'and')] PHJ19040401-V19-04-page26.txt: [("'lowed", 'lowed')] PHJ19040401-V19-04-page28.txt: [("'MC", 'MC')] PHJ19040401-V19-04-page3.txt: [("'of", 'of')] PHJ19040401-V19-04-page34.txt: [("'ClktNi", 'ClktNi')] PHJ19040401-V19-04-page35.txt: [("'r", 'r')] PHJ19040401-V19-04-page5.txt: [("'bad", 'bad')] PHJ19040501-V19-05-page2.txt: [("'L", 'L')] PHJ19040501-V19-05-page6.txt: [("'PK", 'PK')]
In [23]:
# %load shared_elements/summary.py
summary = GoH.reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/PHJ/correction4 Average verified rate: 0.9808491067757629 Average of error rates: 0.02707709397911575 Total token count: 2850572
In [24]:
# %load shared_elements/top_errors.py
errors_summary = GoH.reports.get_errors_summary( summary )
GoH.reports.top_errors( errors_summary, 10 )[:50]
Out[24]:
[('m', 2736), ('d', 2336), ("'", 1997), ('e', 1399), ('r', 1279), ('t', 1213), ('w', 1176), ('n', 1124), ('co', 1118), ('f', 796), ('g', 766), ('x', 649), ('lb', 588), ('th', 260), ('sel', 251), ('oo', 237), ('mo', 233), ('pp', 228), ('z', 217), ('k', 216), ('u', 199), ("an'", 192), ('q', 133), ('ex', 111), ('al', 109), ('ournal', 106), ('pa', 88), ('te', 86), ('ro', 86), ('oz', 81), ('ga', 81), ('va', 74), ('munn', 73), ('ti', 73), ('io', 72), ('-', 66), ('tion', 63), ("infants'", 61), ('id', 59), ('re', 58), ('zo', 55), ('em', 55), ('viperance', 54), ("''", 49), ('si', 48), ('urnal', 48), ('yo', 48), ('fahr', 46), ('cc', 45), ("hours'", 44)]
Correction 5 -- Rejoin Burst Words¶
In [25]:
# %load shared_elements/rejoin_burst_words.py
prev = cycle
cycle = "correction5"
directories = GoH.utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = GoH.utilities.readfile(directories['prev'], filename)
pattern = re.compile("(\s(\w{1,2}\s){5,})")
replacements = []
GoH.clean.check_splits(pattern, spelling_dictionary, content, replacements)
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = GoH.clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
PHJ18850601-V01-01-page2.txt: [('It', 'It')] PHJ18861001-V02-03-page1.txt: [('So', 'So')] PHJ18870401-V02-06-page1.txt: [('Iv', 'Iv')] PHJ18870601-V02-07-page13.txt: [('At', 'At')] PHJ18890401-V04-04-page28.txt: [('Of', 'Of')] PHJ18890501-V04-05-page14.txt: [(' w o nd e r ', 'wonder')] PHJ18890501-V04-05-page29.txt: [('If', 'If'), ('Of', 'Of')] PHJ18891001-V04-10-page29.txt: [('It', 'It')] PHJ18891201-V04-12-page34.txt: [('To', 'To')] PHJ18900301-V05-03-page30.txt: [('So', 'So')] PHJ18900501-V05-05-page31.txt: [('In', 'In')] PHJ18900801-V05-08-page29.txt: [('So', 'So')] PHJ18901001-V05-10-page31.txt: [('St', 'St')] PHJ18901201-V05-12-page28.txt: [('It', 'It')] PHJ18901201-V05-12-page29.txt: [('Go', 'Go')] PHJ18910101-V06-01-page28.txt: [('Do', 'Do')] PHJ18920401-V07-04-page30.txt: [('No', 'No')] PHJ18920501-V07-05-page28.txt: [('Is', 'Is')] PHJ19020201-V17-02-page2.txt: [('It', 'It')] PHJ19020301-V17-03-page24.txt: [('He', 'He')] PHJ19020601-V17-06-page35.txt: [('Is', 'Is')] PHJ19020801-V17-08-page31.txt: [('Be', 'Be'), ('Do', 'Do')] PHJ19020801-V17-08-page7.txt: [('If', 'If')] PHJ19030101-V18-01-page35.txt: [('El', 'El')] PHJ19030401-V18-04-page2.txt: [('Vs', 'Vs'), ('Vs', 'Vs')] PHJ19031201-V18-12-page35.txt: [('Ai', 'Ai')] PHJ19040301-V19-03-page29.txt: [('If', 'If')]
In [26]:
# %load shared_elements/summary.py
summary = GoH.reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/PHJ/correction5 Average verified rate: 0.9808463809521136 Average of error rates: 0.027078649189069097 Total token count: 2850584
In [27]:
# %load shared_elements/top_errors.py
errors_summary = GoH.reports.get_errors_summary( summary )
GoH.reports.top_errors( errors_summary, 10 )[:50]
Out[27]:
[('m', 2736), ('d', 2336), ("'", 1997), ('e', 1398), ('r', 1278), ('t', 1213), ('w', 1175), ('n', 1124), ('co', 1118), ('f', 796), ('g', 766), ('x', 649), ('lb', 588), ('th', 260), ('sel', 251), ('oo', 237), ('mo', 233), ('pp', 228), ('z', 217), ('k', 216), ('u', 199), ("an'", 192), ('q', 133), ('ex', 111), ('al', 109), ('ournal', 106), ('pa', 88), ('te', 86), ('ro', 86), ('oz', 81), ('ga', 81), ('va', 74), ('munn', 73), ('ti', 73), ('io', 72), ('-', 66), ('tion', 63), ("infants'", 61), ('id', 59), ('re', 58), ('zo', 55), ('em', 55), ('viperance', 54), ("''", 49), ('si', 48), ('urnal', 48), ('yo', 48), ('fahr', 46), ('cc', 45), ("hours'", 44)]
Correction 6 -- Rejoin Split Words¶
In [28]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction6"
directories = GoH.utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = GoH.utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = GoH.utilities.tokenize_text(text)
errors = GoH.reports.identify_errors(tokens, spelling_dictionary)
replacements = GoH.clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=False)
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = GoH.clean.replace_split_words(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
PHJ18850601-V01-01-page23.txt: [('re', 's')] PHJ18850601-V01-01-page3.txt: [('th', 'ese'), ('co', 'exist')] PHJ18850801-V01-02-page22.txt: [('pp', 'r')] PHJ18850801-V01-02-page23.txt: [('ri', 'le')] PHJ18851001-V01-03-page22.txt: [('PP', 'r')] PHJ18851201-V01-04-page16.txt: [('devel', 'oped')] PHJ18851201-V01-04-page18.txt: [('wiv', 'e')] PHJ18851201-V01-04-page5.txt: [('FO', 'UL')] PHJ18851201-V01-04-page6.txt: [('accommo', 'dating')] PHJ18851201-V01-04-page8.txt: [('phy', 'sician')] PHJ18860201-V01-05-page23.txt: [('PRESERVA', 'TION')] PHJ18860201-V01-05-page3.txt: [('dif', 'ficult')] PHJ18860401-V01-06-page22.txt: [('PREVEN', 'TION')] PHJ18860601-V02-01-page19.txt: [('striv', 'ing')] PHJ18860601-V02-01-page21.txt: [('shoul', 'ders'), ('ri', 'ce')] PHJ18860601-V02-01-page22.txt: [('TEMPERANC', 'E')] PHJ18860601-V02-01-page3.txt: [('alime', 'ntation')] PHJ18860801-V02-02-page1.txt: [('ti', 'e')] PHJ18860801-V02-02-page24.txt: [('fo', 'r')] PHJ18860801-V02-02-page26.txt: [('BI', 'MONTHLY')] PHJ18860801-V02-02-page31.txt: [('STOM', 'ACHS')] PHJ18860801-V02-02-page4.txt: [('ea', 't')] PHJ18860801-V02-02-page8.txt: [('condi', 'ments')] PHJ18861001-V02-03-page11.txt: [('mechan', 'ism')] PHJ18861001-V02-03-page13.txt: [('Plym', 'outh')] PHJ18861001-V02-03-page2.txt: [('Genito', 'Urinary')] PHJ18861001-V02-03-page20.txt: [('AMENORRH', 'EA')] PHJ18861001-V02-03-page21.txt: [('recog', 'nized'), ('ac', 'companies')] PHJ18861001-V02-03-page24.txt: [('ti', 'ti')] PHJ18861001-V02-03-page26.txt: [('bi', 'monthly')] PHJ18861001-V02-03-page27.txt: [('Bi', 'Monthly')] PHJ18861001-V02-03-page30.txt: [('PRESERVA', 'TION')] PHJ18861001-V02-03-page7.txt: [('supernum', 'erary')] PHJ18861001-V02-03-page8.txt: [('th', 'e'), ('kno', 'wledge')] PHJ18861201-V02-04-page1.txt: [('Dau', 'ghters'), ('ti', 'e')] PHJ18861201-V02-04-page17.txt: [('CU', 'RE')] PHJ18861201-V02-04-page24.txt: [('co', 'o')] PHJ18861201-V02-04-page27.txt: [('PRESERVA', 'TION')] PHJ18861201-V02-04-page28.txt: [('Yo', 'Semite'), ('HUTC', 'HINGS')] PHJ18861201-V02-04-page29.txt: [('Yo', 'Semite')] PHJ18861201-V02-04-page3.txt: [('BI', 'MONTHLY')] PHJ18861201-V02-04-page32.txt: [('M.', '')] PHJ18870201-V02-05-page1.txt: [('ti', 'e')] PHJ18870201-V02-05-page16.txt: [('frustrat', 'ing')] PHJ18870201-V02-05-page19.txt: [('ro', 'per')] PHJ18870201-V02-05-page2.txt: [('Genito', 'Urinary')] PHJ18870201-V02-05-page21.txt: [('diffi', 'culty')] PHJ18870201-V02-05-page30.txt: [('Yo', 'Semite')] PHJ18870201-V02-05-page32.txt: [('TE', 'E'), ('li', 't')] PHJ18870201-V02-05-page6.txt: [('nar', 'cotics')] PHJ18870201-V02-05-page8.txt: [('ne', 'ighbors')] PHJ18870401-V02-06-page1.txt: [('al', 'e'), ('Mal', 'aria')] PHJ18870401-V02-06-page13.txt: [('neces', 'saries')] PHJ18870401-V02-06-page26.txt: [('Societ', 'y'), ('PRE', 'S'), ('Norwa', 'y')] PHJ18870401-V02-06-page29.txt: [('soci', 'ety'), ('Bi', 'Monthly')] PHJ18870401-V02-06-page30.txt: [('Yo', 'Semite')] PHJ18870401-V02-06-page32.txt: [('li', 'V')] PHJ18870601-V02-07-page15.txt: [('Declar', 'ing')] PHJ18870601-V02-07-page2.txt: [('Genito', 'Urinary')] PHJ18870601-V02-07-page20.txt: [('Mc', 'Dowell')] PHJ18870601-V02-07-page25.txt: [('WA', 'FERS')] PHJ18870601-V02-07-page27.txt: [('Societ', 'y'), ('Ow', 'n')] PHJ18870601-V02-07-page31.txt: [('Yo', 'Semite')] PHJ18870801-V02-08-page1.txt: [('ti', 'e')] PHJ18870801-V02-08-page2.txt: [('Genito', 'Urinary')] PHJ18870801-V02-08-page21.txt: [('diseas', 'e')] PHJ18870801-V02-08-page25.txt: [('WHEA', 'TENA')] PHJ18870801-V02-08-page31.txt: [('Yo', 'Semite')] PHJ18871001-V02-09-page1.txt: [('po', 'etry')] PHJ18871001-V02-09-page14.txt: [('fathe', 'r')] PHJ18871001-V02-09-page18.txt: [('devel', 'oped')] PHJ18871001-V02-09-page2.txt: [('Genito', 'Urinary')] PHJ18871001-V02-09-page23.txt: [('HEA', 'T')] PHJ18871001-V02-09-page24.txt: [('ID', 'S'), ('Thermo', 'Electric')] PHJ18871001-V02-09-page25.txt: [('WHEA', 'TENA'), ('WA', 'FERS')] PHJ18871001-V02-09-page26.txt: [('Yo', 'Semite')] PHJ18871001-V02-09-page27.txt: [('SC', 'R'), ('Yo', 'Semite')] PHJ18871001-V02-09-page32.txt: [('AL', 'G')] PHJ18871001-V02-09-page8.txt: [('gener', 'ation')] PHJ18880101-V03-01-page21.txt: [('holi', 'days')] PHJ18880101-V03-01-page22.txt: [('dic', 'tionary')] PHJ18880101-V03-01-page27.txt: [('Yo', 'Semite')] PHJ18880101-V03-01-page5.txt: [('mur', 'murings')] PHJ18880101-V03-01-page7.txt: [('olfac', 'tory')] PHJ18880201-V03-02-page14.txt: [('re', 'instate')] PHJ18880201-V03-02-page26.txt: [('DELT', 'A')] PHJ18880201-V03-02-page27.txt: [('Yo', 'Semite')] PHJ18880201-V03-02-page28.txt: [('CA', 'N')] PHJ18880201-V03-02-page5.txt: [('Defi', 'ciency')] PHJ18880201-V03-02-page7.txt: [('Deo', 'N')] PHJ18880201-V03-02-page8.txt: [('respon', 'sible')] PHJ18880301-V03-03-page24.txt: [('TEM', 'PERANCE')] PHJ18880301-V03-03-page25.txt: [('gi', 'A')] PHJ18880301-V03-03-page27.txt: [('econom', 'ical'), ('LAUN', 'DRY')] PHJ18880401-V03-04-page19.txt: [('communicat', 'ing')] PHJ18880401-V03-04-page27.txt: [('ro', 'c'), ('econom', 'ical'), ('LAUN', 'DRY'), ('Thermo', 'Electric'), ('SA', 'FETY')] PHJ18880401-V03-04-page31.txt: [('BI', 'S')] PHJ18880401-V03-04-page32.txt: [('Yo', 'Semite')] PHJ18880501-V03-05-page2.txt: [('beco', 'me')] PHJ18880501-V03-05-page23.txt: [('vid', 'a')] PHJ18880501-V03-05-page25.txt: [('econom', 'ical'), ('LAUN', 'DRY'), ('Fr', 'is')] PHJ18880501-V03-05-page26.txt: [('HY', 'GIENE'), ('CA', 'N')] PHJ18880501-V03-05-page27.txt: [('APPLI', 'ANCES'), ('SI', 'g')] PHJ18880501-V03-05-page28.txt: [('Thermo', 'Electric')] PHJ18880601-V03-06-page12.txt: [('PEO', 'PLE')] PHJ18880601-V03-06-page15.txt: [('endur', 'ance')] PHJ18880601-V03-06-page19.txt: [('INSTITU', 'TION')] PHJ18880601-V03-06-page25.txt: [('LAUN', 'DRY'), ('co', 'per'), ('PA', 'CIFIC')] PHJ18880601-V03-06-page27.txt: [('APPLI', 'ANCES')] PHJ18880601-V03-06-page3.txt: [('ap', 'petite')] PHJ18880601-V03-06-page31.txt: [('pag', 'es')] PHJ18880701-V03-07-page1.txt: [('prerequi', 'site'), ('Si', 'P'), ('cerebro', 'spinal')] PHJ18880701-V03-07-page22.txt: [('wh', 'ich')] PHJ18880701-V03-07-page26.txt: [('HY', 'GIENE')] PHJ18880701-V03-07-page27.txt: [('APPLI', 'ANCES'), ('co', 't')] PHJ18880701-V03-07-page6.txt: [('experim', 'ents'), ('co', 'mplete')] PHJ18880801-V03-08-page24.txt: [('pre', 'ssing')] PHJ18880801-V03-08-page25.txt: [('ap', 'ply')] PHJ18880801-V03-08-page27.txt: [('Thermo', 'Electric'), ('es', 'sentially')] PHJ18880801-V03-08-page28.txt: [('Phy', 'sician')] PHJ18880801-V03-08-page30.txt: [('APPLI', 'ANCES')] PHJ18880801-V03-08-page32.txt: [('OAKLAN', 'D'), ('mo', 't')] PHJ18880801-V03-08-page6.txt: [('carbo', 'hydrates')] PHJ18880801-V03-08-page7.txt: [('carbo', 'hydrates')] PHJ18880901-V03-09-page19.txt: [('ri', 'to')] PHJ18880901-V03-09-page28.txt: [('WANTE', 'D')] PHJ18880901-V03-09-page32.txt: [('Yo', 'Semite'), ('SUBSCR', 'IPTION')] PHJ18881001-V03-10-page12.txt: [('legis', 'late')] PHJ18881001-V03-10-page20.txt: [('ro', 'm')] PHJ18881001-V03-10-page27.txt: [('MO', 'per')] PHJ18881001-V03-10-page28.txt: [('Thermo', 'Electric')] PHJ18881001-V03-10-page6.txt: [('foreig', 'n')] PHJ18881101-V03-11-page11.txt: [('pa', 'tiently')] PHJ18881101-V03-11-page19.txt: [('th', 'at'), ('Stimu', 'lating')] PHJ18881101-V03-11-page20.txt: [('TEM', 'PERANCE')] PHJ18881101-V03-11-page21.txt: [('ex', 'ample')] PHJ18881101-V03-11-page25.txt: [('Id', 'a')] PHJ18881101-V03-11-page29.txt: [('CA', 'N'), ('co', 'co')] PHJ18881101-V03-11-page32.txt: [('al', 'a')] PHJ18881101-V03-11-page5.txt: [('re', 'absorption')] PHJ18881101-V03-11-page9.txt: [('lazi', 'ness')] PHJ18881201-V03-12-page30.txt: [('HY', 'GIENE')] PHJ18881201-V03-12-page31.txt: [('Thermo', 'Electric')] PHJ18881201-V03-12-page34.txt: [('PAG', 'E')] PHJ18881201-V03-12-page9.txt: [('dren', 'the')] PHJ18890101-V04-01-page12.txt: [('conclu', 'sion')] PHJ18890101-V04-01-page21.txt: [('HYPERTRO', 'PHY')] PHJ18890101-V04-01-page24.txt: [('charac', 'ter')] PHJ18890101-V04-01-page29.txt: [('ove', 'n'), ('WHEA', 'TENA'), ('WA', 'FERS')] PHJ18890101-V04-01-page32.txt: [('STEE', 'L')] PHJ18890201-V04-02-page15.txt: [('atte', 'mpt')] PHJ18890201-V04-02-page16.txt: [('INTEMPER', 'ANCE')] PHJ18890201-V04-02-page20.txt: [('ment', 'or')] PHJ18890201-V04-02-page28.txt: [('WA', 'FERS')] PHJ18890301-V04-03-page2.txt: [('ex', 'hausting')] PHJ18890301-V04-03-page21.txt: [('squan', 'ders')] PHJ18890301-V04-03-page28.txt: [('bu', 't'), ('WA', 'FERS')] PHJ18890301-V04-03-page3.txt: [('WES', 'LEY')] PHJ18890301-V04-03-page32.txt: [('MA', 'i')] PHJ18890401-V04-04-page1.txt: [('nurs', 'ing')] PHJ18890401-V04-04-page14.txt: [('ca', 'rousal')] PHJ18890401-V04-04-page22.txt: [("Woman'", 's')] PHJ18890401-V04-04-page27.txt: [('ro', 'at')] PHJ18890401-V04-04-page28.txt: [('SANC', 'TIFICATION'), ('ti', 'c')] PHJ18890401-V04-04-page3.txt: [('ex', 'perience')] PHJ18890401-V04-04-page31.txt: [('WA', 'FERS')] PHJ18890401-V04-04-page32.txt: [('Thermo', 'Electric')] PHJ18890501-V04-05-page14.txt: [('exten', 'sively')] PHJ18890501-V04-05-page25.txt: [('follo', 'ws')] PHJ18890501-V04-05-page27.txt: [('insti', 'tution')] PHJ18890501-V04-05-page29.txt: [('Thermo', 'Electric')] PHJ18890501-V04-05-page31.txt: [('substan', 'tial')] PHJ18890601-V04-06-page13.txt: [('smok', 'ing')] PHJ18890601-V04-06-page25.txt: [('substan', 'tial')] PHJ18890601-V04-06-page27.txt: [('Lan', 'caster')] PHJ18890601-V04-06-page28.txt: [('Cr', 'ackers')] PHJ18890601-V04-06-page29.txt: [('Thermo', 'Electric'), ('ti', 'a')] PHJ18890601-V04-06-page32.txt: [('ro', 'C')] PHJ18890701-V04-07-page27.txt: [('Lan', 'caster')] PHJ18890701-V04-07-page28.txt: [('Thermo', 'Electric')] PHJ18890801-V04-08-page10.txt: [('re', 'marks')] PHJ18890801-V04-08-page14.txt: [('PA', 'SO')] PHJ18890801-V04-08-page15.txt: [('swal', 'low')] PHJ18890801-V04-08-page20.txt: [('ab', 'out')] PHJ18890801-V04-08-page21.txt: [('ca', 'n')] PHJ18890801-V04-08-page28.txt: [('Thermo', 'Electric')] PHJ18890801-V04-08-page29.txt: [('Cr', 'ackers')] PHJ18890801-V04-08-page7.txt: [("Harper'", 's')] PHJ18890901-V04-09-page12.txt: [('re', 'frain')] PHJ18890901-V04-09-page21.txt: [('reme', 'died')] PHJ18890901-V04-09-page23.txt: [('subordi', 'nate')] PHJ18890901-V04-09-page24.txt: [('al', 'I')] PHJ18890901-V04-09-page29.txt: [('Thermo', 'Electric')] PHJ18891001-V04-10-page11.txt: [('profes', 'sion'), ("Harper'", 's')] PHJ18891001-V04-10-page29.txt: [('Thermo', 'Electric'), ('Da', 'isy')] PHJ18891101-V04-11-page26.txt: [('TEM', 'PERANCE')] PHJ18891101-V04-11-page29.txt: [('Thermo', 'Electric')] PHJ18891101-V04-11-page30.txt: [("SALISBURY'", 'S')] PHJ18891101-V04-11-page7.txt: [('Ro', 'Me')] PHJ18891101-V04-11-page8.txt: [('inclina', 'tions')] PHJ18891201-V04-12-page14.txt: [('chil', 'dren')] PHJ18891201-V04-12-page15.txt: [('recom', 'mended')] PHJ18891201-V04-12-page18.txt: [('kal', 'is')] PHJ18891201-V04-12-page28.txt: [('lb', 's')] PHJ18891201-V04-12-page29.txt: [('Thermo', 'Electric')] PHJ18891201-V04-12-page33.txt: [('absti', 'nence')] PHJ18891201-V04-12-page34.txt: [('chil', 'dren')] PHJ18891201-V04-12-page8.txt: [('al', 'd')] PHJ18900101-V05-01-page25.txt: [('re', 'adapting')] PHJ18900101-V05-01-page27.txt: [('Jou', 'RNAL')] PHJ18900101-V05-01-page28.txt: [('Thermo', 'Electric')] PHJ18900101-V05-01-page7.txt: [('VEGETA', 'RIANISM')] PHJ18900201-V05-02-page17.txt: [('se', 'a')] PHJ18900201-V05-02-page24.txt: [('th', 'e')] PHJ18900201-V05-02-page25.txt: [('demi', 'monde')] PHJ18900201-V05-02-page27.txt: [('Lincol', 'n')] PHJ18900201-V05-02-page28.txt: [('Suppo', 'rter'), ('Thermo', 'Electric')] PHJ18900301-V05-03-page23.txt: [('ful', 'as')] PHJ18900301-V05-03-page26.txt: [('mo', 't')] PHJ18900301-V05-03-page32.txt: [('Sto', 'a')] PHJ18900401-V05-04-page26.txt: [('respons', 'e')] PHJ18900401-V05-04-page27.txt: [('pa', 'say')] PHJ18900401-V05-04-page31.txt: [('cl', 'osed'), ('es', 'sentially')] PHJ18900401-V05-04-page32.txt: [('CO', 'R'), ('pr', 'ef')] PHJ18900401-V05-04-page9.txt: [('appe', 'tites')] PHJ18900501-V05-05-page21.txt: [('impor', 'tant')] PHJ18900501-V05-05-page26.txt: [('Mc', 'Clure')] PHJ18900501-V05-05-page8.txt: [('al', 'way')] PHJ18900601-V05-06-page12.txt: [('tink', 'ling')] PHJ18900601-V05-06-page21.txt: [('inno', 'vations'), ('nu', 'n')] PHJ18900601-V05-06-page24.txt: [('increasin', 'g')] PHJ18900601-V05-06-page27.txt: [('Lan', 'caster')] PHJ18900601-V05-06-page30.txt: [('ONL', 'Y')] PHJ18900601-V05-06-page32.txt: [('HALLET', 'T')] PHJ18900601-V05-06-page5.txt: [('re', 'garded')] PHJ18900701-V05-07-page12.txt: [("Youth'", 's')] PHJ18900701-V05-07-page25.txt: [('al', 'ways')] PHJ18900701-V05-07-page30.txt: [('lithog', 'raph')] PHJ18900701-V05-07-page31.txt: [('ta', 't')] PHJ18900701-V05-07-page32.txt: [('buil', 'ding')] PHJ18900701-V05-07-page6.txt: [('INDEPEND', 'ENCE')] PHJ18900801-V05-08-page23.txt: [('fl', 'at')] PHJ18900801-V05-08-page26.txt: [('PROHIBI', 'TION')] PHJ18900801-V05-08-page28.txt: [('COMBINA', 'TION')] PHJ18900801-V05-08-page29.txt: [('Cata', 'logue')] PHJ18900801-V05-08-page31.txt: [('buil', 'ding')] PHJ18900801-V05-08-page32.txt: [('Vo', 'e')] PHJ18900801-V05-08-page4.txt: [('un', 'heeded')] PHJ18900901-V05-09-page26.txt: [('forti', 'eth')] PHJ18900901-V05-09-page31.txt: [('Thi', 's')] PHJ18901001-V05-10-page12.txt: [('ap', 'plied')] PHJ18901001-V05-10-page17.txt: [('ro', 'per')] PHJ18901001-V05-10-page31.txt: [('CO', 'P'), ('buil', 'ding')] PHJ18901001-V05-10-page7.txt: [('al', 'e')] PHJ18901101-V05-11-page26.txt: [('ASSOCIA', 'TION')] PHJ18901101-V05-11-page6.txt: [('CI', 'NE')] PHJ18901201-V05-12-page14.txt: [("Youth'", 's')] PHJ18901201-V05-12-page23.txt: [('tem', 'ple')] PHJ18901201-V05-12-page26.txt: [('PA', 'CIFIC')] PHJ18901201-V05-12-page28.txt: [('CO', 'P'), ('ca', 'se'), ('ti', 'The')] PHJ18901201-V05-12-page31.txt: [('IE', 'R')] PHJ18910101-V06-01-page11.txt: [("Hall'", 's')] PHJ18910101-V06-01-page27.txt: [('exi', 'ts')] PHJ18910101-V06-01-page29.txt: [('cl', 'osed')] PHJ18910101-V06-01-page30.txt: [('Assy', 'rian')] PHJ18910101-V06-01-page32.txt: [('corres', 'pondence')] PHJ18910101-V06-01-page8.txt: [('OPPOR', 'TUNITIES')] PHJ18910201-V06-02-page12.txt: [('al', 'ways')] PHJ18910201-V06-02-page18.txt: [('SHIR', 'KING')] PHJ18910201-V06-02-page29.txt: [('se', 'c')] PHJ18910201-V06-02-page30.txt: [('M.', ''), ('Assy', 'rian')] PHJ18910301-V06-03-page32.txt: [('corres', 'pondence')] PHJ18910401-V06-04-page17.txt: [('un', 'til')] PHJ18910401-V06-04-page28.txt: [('TA', 'PE-WORM')] PHJ18910401-V06-04-page31.txt: [('wor', 'ld')] PHJ18910401-V06-04-page32.txt: [('corres', 'pondence')] PHJ18910501-V06-05-page11.txt: [('recom', 'mend'), ('ob', 'taining')] PHJ18910501-V06-05-page27.txt: [('sani', 'tas')] PHJ18910501-V06-05-page30.txt: [('HEA', 'LTHFUL')] PHJ18910501-V06-05-page32.txt: [('corres', 'pondence')] PHJ18910501-V06-05-page9.txt: [('DYS', 'PEPSIA')] PHJ18910601-V06-06-page11.txt: [('oc', 'currence')] PHJ18910601-V06-06-page19.txt: [('jeal', 'ousy')] PHJ18910601-V06-06-page20.txt: [('Balti', 'more')] PHJ18910601-V06-06-page24.txt: [('sus', 'picion')] PHJ18910601-V06-06-page28.txt: [('phy', 'sicians'), ('inconven', 'iently')] PHJ18910601-V06-06-page32.txt: [('corres', 'pondence')] PHJ18910701-V06-07-page13.txt: [('INTOX', 'ICATING')] PHJ18910701-V06-07-page17.txt: [('Espe', 'cially')] PHJ18910701-V06-07-page19.txt: [('substan', 'tials')] PHJ18910701-V06-07-page27.txt: [('Cr', 'ackers')] PHJ18910701-V06-07-page28.txt: [('inconven', 'iently')] PHJ18910701-V06-07-page29.txt: [('cl', 'osed')] PHJ18910701-V06-07-page3.txt: [('capacit', 'y')] PHJ18910701-V06-07-page32.txt: [('corres', 'pondence')] PHJ18910801-V06-08-page1.txt: [('MONTHL', 'Y')] PHJ18910801-V06-08-page28.txt: [('shoul', 'ders'), ('phy', 'sicians'), ('inconven', 'iently')] PHJ18910801-V06-08-page32.txt: [('corres', 'pondence')] PHJ18910901-V06-09-page11.txt: [('progeni', 'tors')] PHJ18910901-V06-09-page30.txt: [('Ne', 'w'), ('Thermo', 'Electric')] PHJ18910901-V06-09-page31.txt: [('Tu', 'm')] PHJ18910901-V06-09-page32.txt: [('Clo', 'th'), ('th', 'e')] PHJ18911001-V06-10-page12.txt: [('AL', 'COHOL')] PHJ18911001-V06-10-page22.txt: [('CHOCO', 'LATE')] PHJ18911001-V06-10-page23.txt: [('itt', 'a')] PHJ18911001-V06-10-page27.txt: [('mo', 't')] PHJ18911001-V06-10-page28.txt: [('th', 'e')] PHJ18911001-V06-10-page3.txt: [('dyna', 'mite')] PHJ18911001-V06-10-page32.txt: [('BEA', 'TTY'), ('GA', 's')] PHJ18911001-V06-10-page5.txt: [('bac', 'teria')] PHJ18911101-V06-11-page19.txt: [('prec', 'incts')] PHJ18911101-V06-11-page22.txt: [('entr', 'e')] PHJ18911101-V06-11-page27.txt: [('elec', 'tric')] PHJ18911101-V06-11-page28.txt: [('Beatt', 'y')] PHJ18911101-V06-11-page29.txt: [('ib', 'o')] PHJ18911101-V06-11-page30.txt: [('Thermo', 'Electric')] PHJ18911101-V06-11-page31.txt: [('ase', 'r')] PHJ18911101-V06-11-page32.txt: [('FRIE', 'D')] PHJ18911101-V06-11-page5.txt: [('pater', 'familias')] PHJ18911201-V06-12-page1.txt: [('exer', 'cise')] PHJ18911201-V06-12-page30.txt: [('PLA', 'TES')] PHJ18911201-V06-12-page31.txt: [('wor', 'k')] PHJ18911201-V06-12-page32.txt: [('FRIE', 'D')] PHJ18920101-V07-01-page1.txt: [('co', 'operation')] PHJ18920101-V07-01-page20.txt: [('EF', 'FECT')] PHJ18920101-V07-01-page28.txt: [('ADVERTIS', 'ING')] PHJ18920101-V07-01-page29.txt: [('OA', 'R'), ('Yo', 'Semite'), ('pu', 'BLISHING')] PHJ18920101-V07-01-page31.txt: [('Ingrow', 'Ing'), ('generat', 'ion')] PHJ18920201-V07-02-page25.txt: [('investi', 'gate')] PHJ18920201-V07-02-page27.txt: [('pa', 'd')] PHJ18920201-V07-02-page28.txt: [('ADVERTIS', 'ING')] PHJ18920201-V07-02-page29.txt: [('YO', 'SEMITE'), ('Yo', 'Semite')] PHJ18920201-V07-02-page3.txt: [('cul', 'ture')] PHJ18920201-V07-02-page30.txt: [('FRIE', 'D')] PHJ18920201-V07-02-page5.txt: [('py', 'emia')] PHJ18920201-V07-02-page6.txt: [('stom', 'A')] PHJ18920201-V07-02-page9.txt: [('re', 'leased')] PHJ18920301-V07-03-page10.txt: [('EM', 'P')] PHJ18920301-V07-03-page18.txt: [('enjoyabl', 'e')] PHJ18920301-V07-03-page19.txt: [('ti', 't')] PHJ18920301-V07-03-page25.txt: [('th', 'e')] PHJ18920301-V07-03-page28.txt: [('te', 'a'), ('ADVERTIS', 'ING')] PHJ18920301-V07-03-page31.txt: [('po', 'o')] PHJ18920301-V07-03-page6.txt: [('additi', 'on')] PHJ18920401-V07-04-page2.txt: [('treatmen', 't')] PHJ18920401-V07-04-page22.txt: [('remem', 'bered')] PHJ18920401-V07-04-page3.txt: [('pre', 'vention'), ('re', 'lief')] PHJ18920401-V07-04-page31.txt: [('pa', 'd')] PHJ18920401-V07-04-page32.txt: [('tI', 'S'), ('Fitz', 'Gerald'), ('ADVERTIS', 'ING')] PHJ18920401-V07-04-page5.txt: [('indi', 'cates')] PHJ18920501-V07-05-page22.txt: [('re', 'T'), ('LI', 'I'), ('ri', 'a')] PHJ18920501-V07-05-page28.txt: [('NA', 'PA')] PHJ18920501-V07-05-page32.txt: [('Fitz', 'Gerald'), ('ADVERTIS', 'ING')] PHJ18920601-V07-06-page11.txt: [('Fitz', 'Hugh')] PHJ18920601-V07-06-page25.txt: [('Un', 'doubtedly')] PHJ18920601-V07-06-page26.txt: [('esti', 'mated')] PHJ18920601-V07-06-page31.txt: [('Yr', 'S')] PHJ18920601-V07-06-page32.txt: [('Fitz', 'Gerald'), ('ADVERTIS', 'ING')] PHJ18920601-V07-06-page5.txt: [('RI', 'AL')] PHJ18920601-V07-06-page6.txt: [('EXTER', 'NALLY')] PHJ18920701-V07-07-page13.txt: [('phy', 'sicians')] PHJ18920701-V07-07-page14.txt: [('ab', 'e'), ('profes', 'sion')] PHJ18920701-V07-07-page19.txt: [('CHIL', 'DREN')] PHJ18920701-V07-07-page22.txt: [('th', 'at')] PHJ18920701-V07-07-page28.txt: [('sar', 'Is'), ('CO', 'on')] PHJ18920701-V07-07-page30.txt: [('Fitz', 'Gerald'), ('AZIe', 'I')] PHJ18920701-V07-07-page32.txt: [('Yo', 'Semite')] PHJ18920801-V07-08-page1.txt: [('M.', '')] PHJ18920801-V07-08-page10.txt: [('VENTILA', 'TION')] PHJ18920801-V07-08-page30.txt: [('CO', 'on')] PHJ18920801-V07-08-page32.txt: [('Fitz', 'Gerald'), ('WA', 'S')] PHJ18920801-V07-08-page4.txt: [('expen', 'diture')] PHJ18920801-V07-08-page7.txt: [('suc', 'ceeded')] PHJ18920901-V07-09-page1.txt: [('MONTHL', 'Y')] PHJ18920901-V07-09-page28.txt: [('Pre', 'Adamite')] PHJ18920901-V07-09-page4.txt: [('ex', 'ercise')] PHJ18921001-V07-10-page1.txt: [('MONTHL', 'Y')] PHJ18921001-V07-10-page23.txt: [('nitroge', 'nous')] PHJ18921001-V07-10-page28.txt: [('CO', 'on'), ('pa', 'd')] PHJ18921001-V07-10-page32.txt: [('HYGIE', 'NIC')] PHJ18921001-V07-10-page8.txt: [('uncon', 'scious')] PHJ18921101-V07-11-page19.txt: [("MOTHER'", 'S')] PHJ18921101-V07-11-page25.txt: [('descrip', 'tions')] PHJ18921101-V07-11-page31.txt: [('pre', 'eminent')] PHJ18921101-V07-11-page32.txt: [('Gasolin', 'e'), ('Fitz', 'Gerald')] PHJ18921101-V07-11-page5.txt: [('ca', 'tarrh')] PHJ18921101-V07-11-page6.txt: [('bi', 'concave')] PHJ18921201-V07-12-page11.txt: [('ene', 'mies')] PHJ18921201-V07-12-page15.txt: [('th', 'a')] PHJ18921201-V07-12-page2.txt: [('thermo', 'electricity')] PHJ18921201-V07-12-page28.txt: [('Re', 'treat')] PHJ18921201-V07-12-page29.txt: [('PRE', 'PAID')] PHJ18921201-V07-12-page32.txt: [('Fitz', 'Gerald')] PHJ18921201-V07-12-page5.txt: [('oth', 'erwise'), ('es', 'pecially')] PHJ18960101-V11-01-page17.txt: [('NERV', 'OUS')] PHJ18960101-V11-01-page7.txt: [("Harper'", 's')] PHJ18960201-V11-02-page22.txt: [('ASSO', 'CIATIONS')] PHJ18960201-V11-02-page27.txt: [('ig', 'norant')] PHJ18960201-V11-02-page29.txt: [('th', 'a')] PHJ18960201-V11-02-page31.txt: [('CALIFORNI', 'A')] PHJ18960201-V11-02-page5.txt: [('Hebri', 'des')] PHJ18960301-V11-03-page20.txt: [('QUERI', 'ES')] PHJ18960501-V11-05-page11.txt: [('IL', 'A')] PHJ18960501-V11-05-page7.txt: [('physiol', 'ogy')] PHJ18960601-V11-06-page31.txt: [('OA', 'KLAND')] PHJ18960601-V11-06-page32.txt: [('asth', 'ma')] PHJ18960601-V11-06-page6.txt: [('DISSEM', 'INATION')] PHJ18960801-V11-08-page23.txt: [('WOR', 'RYING')] PHJ18960801-V11-08-page26.txt: [('atten', 'tion')] PHJ18960801-V11-08-page32.txt: [('ri', 'D')] PHJ18960801-V11-08-page5.txt: [('ro', 'of')] PHJ18960901-V11-09-page16.txt: [('ce', 'rtain')] PHJ18960901-V11-09-page17.txt: [('MO', "THER'S")] PHJ18960901-V11-09-page32.txt: [('CO', 'St')] PHJ18961001-V11-10-page10.txt: [('LOCOMO', 'TION')] PHJ18961001-V11-10-page18.txt: [('CHIL', 'DREN')] PHJ18961001-V11-10-page26.txt: [('ma', 'I')] PHJ18961001-V11-10-page30.txt: [('ex', 'It')] PHJ18961001-V11-10-page32.txt: [('CO', 'St')] PHJ18961101-V11-11-page10.txt: [('CAPI', 'TAL')] PHJ18961101-V11-11-page17.txt: [('witho', 'ut'), ('ut', 'a')] PHJ18961201-V11-12-page14.txt: [("Trumbull'", 's')] PHJ18961201-V11-12-page31.txt: [('SNY', 'DER')] PHJ18961201-V11-12-page32.txt: [('asth', 'ma')] PHJ18961201-V11-12-page5.txt: [('degrad', 'ing')] PHJ18990301-V14-03-page19.txt: [('ASSOCIA', 'TION'), ('PA', 'CIFIC')] PHJ18990301-V14-03-page8.txt: [('PHYSI', "CIAN'S")] PHJ18990501-V14-05-page16.txt: [("Youth'", 's')] PHJ18990601-V14-06-page11.txt: [('Sa', 'lts')] PHJ18990601-V14-06-page17.txt: [('Ric', 'E')] PHJ18990701-V14-07-page10.txt: [('em', 'bodied')] PHJ18990701-V14-07-page13.txt: [('wh', 'at')] PHJ18990901-V14-09-page30.txt: [('Ch', 'ina')] PHJ18991001-V14-10-page17.txt: [('un', 'usually')] PHJ18991101-V14-11-page19.txt: [('sug', 'gestion')] PHJ19010201-V16-02-page1.txt: [('PA', 'CIFIC')] PHJ19010201-V16-02-page21.txt: [('SANI', 'TARIUM')] PHJ19010201-V16-02-page29.txt: [('CALIFOR', 'NIA')] PHJ19010201-V16-02-page34.txt: [('ele', 'vated')] PHJ19010301-V16-03-page23.txt: [('M.', '')] PHJ19010301-V16-03-page27.txt: [('ou', 'd')] PHJ19010301-V16-03-page31.txt: [('TELE', 'PHONE'), ('Al', 'e')] PHJ19010301-V16-03-page33.txt: [('RE', 'A')] PHJ19010401-V16-04-page11.txt: [('Br', 'O')] PHJ19010401-V16-04-page3.txt: [('M.', '')] PHJ19010501-V16-05-page2.txt: [('Th', 'y')] PHJ19010601-V16-06-page40.txt: [('MIS', 'SIONARY')] PHJ19010701-V16-07-page12.txt: [('Br', 'O')] PHJ19010701-V16-07-page13.txt: [('Boi', 'led'), ('Al', 'A')] PHJ19010701-V16-07-page27.txt: [('TEM', 'PERATURES'), ('OP', 'TIME')] PHJ19010701-V16-07-page3.txt: [('SANITA', 'RIUM')] PHJ19010701-V16-07-page35.txt: [('EU', 'REkA')] PHJ19010801-V16-08-page1.txt: [('PA', 'CIFIC')] PHJ19010801-V16-08-page19.txt: [('AL', 'MOND')] PHJ19010801-V16-08-page2.txt: [('fr', 'A')] PHJ19010801-V16-08-page29.txt: [('ADVERTISEM', 'ENTS')] PHJ19010801-V16-08-page30.txt: [('DIA', 'NA'), ('NA', 'P')] PHJ19010801-V16-08-page33.txt: [('SPECIA', 'LTY')] PHJ19010801-V16-08-page4.txt: [('TEM', 'PERATURES')] PHJ19010901-V16-09-page1.txt: [('PA', 'CIFIC')] PHJ19010901-V16-09-page25.txt: [('impossi', 'ble')] PHJ19010901-V16-09-page27.txt: [('PARLI', 'N')] PHJ19010901-V16-09-page29.txt: [('SE', 'ALS')] PHJ19010901-V16-09-page30.txt: [('CA', 'lk')] PHJ19010901-V16-09-page35.txt: [('EU', 'REKA')] PHJ19011001-V16-10-page1.txt: [('PA', 'CIFIC'), ('ta', 'rt')] PHJ19011001-V16-10-page12.txt: [('hydrothera', 'peutic')] PHJ19011001-V16-10-page3.txt: [('FA', 'in')] PHJ19011001-V16-10-page30.txt: [('il', 'l')] PHJ19011101-V16-11-page30.txt: [('dif', 'fers')] PHJ19011101-V16-11-page5.txt: [('THOM', 'AS')] PHJ19011101-V16-11-page7.txt: [('TUBERCU', 'LOSIS'), ('arterio', 'sclerosis')] PHJ19011201-V16-12-page2.txt: [('Li', 'S')] PHJ19011201-V16-12-page39.txt: [('KE', 'A')] PHJ19011201-V16-12-page44.txt: [('Sanitariu', 'm')] PHJ19020101-V17-01-page35.txt: [('KE', 'A')] PHJ19020101-V17-01-page36.txt: [('Langle', 'y')] PHJ19020101-V17-01-page38.txt: [('STENCI', 'LS')] PHJ19020201-V17-02-page1.txt: [('Br', 'O')] PHJ19020201-V17-02-page9.txt: [('indul', 'gence')] PHJ19020301-V17-03-page27.txt: [('preven', 'tive')] PHJ19020301-V17-03-page36.txt: [('STENCI', 'LS')] PHJ19020401-V17-04-page15.txt: [('pota', 'toes')] PHJ19020401-V17-04-page32.txt: [('KE', 'A')] PHJ19020401-V17-04-page5.txt: [('Vo', 'L')] PHJ19020501-V17-05-page1.txt: [('Br', 'A')] PHJ19020501-V17-05-page13.txt: [('TREA', 'TMENTS')] PHJ19020501-V17-05-page2.txt: [('TH', 'o')] PHJ19020501-V17-05-page21.txt: [('Housek', 'eeping')] PHJ19020501-V17-05-page28.txt: [('PA', 'CIFIC')] PHJ19020601-V17-06-page2.txt: [('SUBSCRIP', 'TIONS')] PHJ19020701-V17-07-page11.txt: [('ro', 'o')] PHJ19020701-V17-07-page2.txt: [('SUBSCRIP', 'TIONS')] PHJ19020801-V17-08-page2.txt: [('li', 'E')] PHJ19020801-V17-08-page25.txt: [('un', 'dertaking')] PHJ19020801-V17-08-page7.txt: [('re', 'turned')] PHJ19020901-V17-09-page15.txt: [('conse', 'quent')] PHJ19020901-V17-09-page34.txt: [('SANI', 'TARIUM')] PHJ19020901-V17-09-page7.txt: [('QUANTI', 'TIES')] PHJ19021001-V17-10-page34.txt: [('SANI', 'TARIUM')] PHJ19021101-V17-11-page27.txt: [('KNO', 'W')] PHJ19021101-V17-11-page29.txt: [('sonom', 'a')] PHJ19021101-V17-11-page34.txt: [('HARRIM', 'AN')] PHJ19021201-V17-12-page36.txt: [('se', 'P')] PHJ19030101-V18-01-page15.txt: [('artis', 'tic')] PHJ19030101-V18-01-page28.txt: [('graduall', 'y')] PHJ19030101-V18-01-page30.txt: [('fo', 'B')] PHJ19030101-V18-01-page34.txt: [('M.', '')] PHJ19030401-V18-04-page23.txt: [('theo', 'ries')] PHJ19030401-V18-04-page28.txt: [('STAM', 'MERING')] PHJ19030401-V18-04-page35.txt: [('cr', 'U'), ('Zi', 'Or')] PHJ19030501-V18-05-page19.txt: [('EDU', 'CATE')] PHJ19030501-V18-05-page29.txt: [('STENCI', 'LS')] PHJ19030501-V18-05-page30.txt: [('fr', 'A')] PHJ19030601-V18-06-page17.txt: [('Tid', 'Bits')] PHJ19030601-V18-06-page2.txt: [('Trainin', 'g')] PHJ19030601-V18-06-page30.txt: [("I'", 's')] PHJ19030601-V18-06-page31.txt: [('JOURNA', 'LS')] PHJ19030701-V18-07-page35.txt: [('NI', 't')] PHJ19030801-V18-08-page12.txt: [('re', 'examined')] PHJ19030901-V18-09-page10.txt: [('infan', 'tum')] PHJ19030901-V18-09-page35.txt: [('li', 'st')] PHJ19030901-V18-09-page4.txt: [('LI', 'I')] PHJ19031001-V18-10-page11.txt: [('re', 'a')] PHJ19031001-V18-10-page17.txt: [('Socr', 'ates')] PHJ19031001-V18-10-page34.txt: [('particula', 'rs')] PHJ19031101-V18-11-page11.txt: [('Itt', 'A')] PHJ19031101-V18-11-page29.txt: [('Ent', 'irely')] PHJ19031101-V18-11-page3.txt: [('fi', 'N'), ('gi', 'g')] PHJ19031101-V18-11-page9.txt: [('gladi', 'ators')] PHJ19031201-V18-12-page35.txt: [('PA', 'li'), ('ti', 'A')] PHJ19040101-V19-01-page27.txt: [("V'", 'S')] PHJ19040101-V19-01-page33.txt: [('LIMITE', 'D')] PHJ19040201-V19-02-page28.txt: [('Ba', 'ttle')] PHJ19040301-V19-03-page34.txt: [('EUREK', 'A'), ('Vegetar', 'ian')] PHJ19040401-V19-04-page12.txt: [('inj', 'uriousness')] PHJ19040401-V19-04-page5.txt: [('hav', 'e')] PHJ19040501-V19-05-page19.txt: [("WOMAN'", 'S')] PHJ19040501-V19-05-page30.txt: [('entr', 'e')] PHJ19040601-V19-06-page13.txt: [('FOMENTA', 'TIONS')]
In [29]:
# %load shared_elements/summary.py
summary = GoH.reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/PHJ/correction6 Average verified rate: 0.9811191527390404 Average of error rates: 0.026716285269940018 Total token count: 2850031
In [30]:
# %load shared_elements/top_errors.py
errors_summary = GoH.reports.get_errors_summary( summary )
GoH.reports.top_errors( errors_summary, 10 )[:50]
Out[30]:
[('m', 2734), ('d', 2327), ("'", 1997), ('e', 1381), ('r', 1270), ('t', 1207), ('w', 1173), ('n', 1120), ('co', 1109), ('f', 796), ('g', 761), ('x', 649), ('lb', 583), ('th', 251), ('sel', 251), ('oo', 237), ('mo', 230), ('pp', 226), ('z', 217), ('k', 215), ('u', 199), ("an'", 192), ('q', 133), ('ex', 106), ('ournal', 106), ('al', 98), ('te', 85), ('oz', 81), ('ro', 80), ('ga', 80), ('pa', 74), ('va', 74), ('munn', 73), ('io', 72), ('ti', 67), ('-', 66), ("infants'", 61), ('id', 58), ('zo', 55), ('viperance', 54), ('em', 53), ('tion', 51), ("''", 49), ('re', 48), ('urnal', 48), ('si', 47), ('fahr', 46), ('cc', 45), ("hours'", 44), ('cloe', 43)]
Correction 7 -- Rejoin Split Words II¶
In [31]:
# %load shared_elements/rejoin_split_words.py
prev = cycle
cycle = "correction7"
directories = GoH.utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = GoH.utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = GoH.utilities.tokenize_text(text)
errors = GoH.reports.identify_errors(tokens, spelling_dictionary)
replacements = GoH.clean.check_if_stem(errors, spelling_dictionary, tokens, get_prior=True)
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = GoH.clean.replace_split_words(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
PHJ18850601-V01-01-page1.txt: [('in', 'toxicating'), ('TEMPER', 'ANCE')] PHJ18850601-V01-01-page16.txt: [('PHYSIC', 'AL'), ('state', 'ment')] PHJ18850601-V01-01-page21.txt: [('o', 'pp')] PHJ18850601-V01-01-page24.txt: [('A', 'ZAL'), ('A', 'li')] PHJ18850801-V01-02-page1.txt: [('F', 'lt')] PHJ18850801-V01-02-page22.txt: [('o', 'pp')] PHJ18850801-V01-02-page4.txt: [('f', 'und')] PHJ18850801-V01-02-page8.txt: [('Q', 'uick'), ('U', 'se'), ('J', 'ust'), ('R', 'emember'), ('K', 'eep'), ('T', 'ake'), ('N', 'ow'), ('P', 'eople'), ('Z', 'eal'), ('C', 'hildren'), ('G', 'arments'), ('M', 'uch'), ('H', 'omes'), ('S', 'ee'), ('X', 'erxes'), ('F', 'reshen'), ('V', 'ery')] PHJ18851001-V01-03-page13.txt: [('A', 'ccount')] PHJ18851001-V01-03-page22.txt: [('num', 'ber'), ('o', 'pp')] PHJ18851001-V01-03-page23.txt: [('K', 'Ra')] PHJ18851001-V01-03-page5.txt: [('r', 'oo')] PHJ18851201-V01-04-page21.txt: [('JOUR', 'NAL')] PHJ18851201-V01-04-page22.txt: [('o', 'pp')] PHJ18860201-V01-05-page14.txt: [('physic', 'al')] PHJ18860401-V01-06-page21.txt: [('JOUR', 'NAL')] PHJ18860401-V01-06-page22.txt: [('con', 'sisted')] PHJ18860401-V01-06-page23.txt: [('SAB', 'BATH-SCHOOL'), ('SEC', 'ULAR')] PHJ18860601-V02-01-page1.txt: [('J', 'UN')] PHJ18860601-V02-01-page21.txt: [('M', "isses'")] PHJ18860601-V02-01-page22.txt: [('P', 'RACTICAL')] PHJ18860801-V02-02-page13.txt: [('con', 'nection')] PHJ18860801-V02-02-page26.txt: [('con', 'tains')] PHJ18860801-V02-02-page28.txt: [('P', 'ase')] PHJ18860801-V02-02-page4.txt: [('to', 'ea')] PHJ18861001-V02-03-page10.txt: [('intro', 'duced')] PHJ18861001-V02-03-page11.txt: [('inter', 'nal')] PHJ18861001-V02-03-page16.txt: [('main', 'tenance')] PHJ18861001-V02-03-page28.txt: [('c', 'astro')] PHJ18861001-V02-03-page29.txt: [('SEC', 'ULAR')] PHJ18861001-V02-03-page30.txt: [('SCRIP', 'TURES')] PHJ18861001-V02-03-page32.txt: [('B', 'AL')] PHJ18861201-V02-04-page19.txt: [('dis', 'pensary'), ('as', 'sume')] PHJ18861201-V02-04-page27.txt: [('H', 'Es'), ('SCRIP', 'TURES'), ('M', 'ARVEL')] PHJ18861201-V02-04-page28.txt: [('r', 'OW')] PHJ18861201-V02-04-page30.txt: [('G', 'eneral'), ('IN', 'TRINSIC'), ('C', 'ONTENT'), ('ad', 'mitted')] PHJ18870201-V02-05-page25.txt: [('on', 'ce')] PHJ18870201-V02-05-page29.txt: [('IN', 'TRINSIC'), ('M', 'icroscopists')] PHJ18870201-V02-05-page30.txt: [('T', 'OW')] PHJ18870201-V02-05-page32.txt: [('I', 'Va')] PHJ18870201-V02-05-page4.txt: [('mus', 'cular')] PHJ18870401-V02-06-page13.txt: [('con', 'ception')] PHJ18870401-V02-06-page31.txt: [('C', 'ome'), ('IN', 'TRINSIC'), ('A', 'uthor')] PHJ18870401-V02-06-page32.txt: [('p', 'atients')] PHJ18870601-V02-07-page13.txt: [('pop', 'ulation'), ('de', 'Lorme')] PHJ18870601-V02-07-page26.txt: [('PRES', 'ERVATION'), ('j', 'ournal')] PHJ18870601-V02-07-page27.txt: [('GEN', 'ERAL')] PHJ18870801-V02-08-page25.txt: [('gas', 'es')] PHJ18870801-V02-08-page28.txt: [('la', 'th')] PHJ18870801-V02-08-page29.txt: [('IN', 'TRINSIC'), ('A', 'uthor'), ('ad', 'mitted')] PHJ18871001-V02-09-page1.txt: [('po', 'etry')] PHJ18871001-V02-09-page29.txt: [('L', 'ithographic')] PHJ18871001-V02-09-page30.txt: [('K', 'Ra')] PHJ18880101-V03-01-page16.txt: [('be', 'ma')] PHJ18880101-V03-01-page18.txt: [('mem', 'branous')] PHJ18880101-V03-01-page21.txt: [('Lin', 'coln')] PHJ18880101-V03-01-page29.txt: [('med', 'ical')] PHJ18880101-V03-01-page5.txt: [('mur', 'murings')] PHJ18880201-V03-02-page24.txt: [('AD', 'VOCATE')] PHJ18880201-V03-02-page28.txt: [('a', 'nd')] PHJ18880301-V03-03-page13.txt: [('com', 'plexion')] PHJ18880301-V03-03-page21.txt: [('rem', 'edy')] PHJ18880301-V03-03-page27.txt: [('a', 'wl')] PHJ18880401-V03-04-page10.txt: [('phys', 'ical')] PHJ18880401-V03-04-page25.txt: [('JOUR', 'NAL')] PHJ18880401-V03-04-page26.txt: [('PUBL', 'IC')] PHJ18880401-V03-04-page27.txt: [('e', 'ND')] PHJ18880401-V03-04-page28.txt: [('med', 'ical')] PHJ18880501-V03-05-page27.txt: [('z', 'zz')] PHJ18880601-V03-06-page26.txt: [('P', 'RY')] PHJ18880701-V03-07-page14.txt: [('imper', 'fectly')] PHJ18880701-V03-07-page27.txt: [('o', 'ch')] PHJ18880801-V03-08-page26.txt: [('M', 'ACH')] PHJ18880801-V03-08-page27.txt: [('es', 'sentially')] PHJ18880901-V03-09-page3.txt: [('pleas', 'ure')] PHJ18880901-V03-09-page31.txt: [('PRES', 'ERVATION')] PHJ18880901-V03-09-page8.txt: [('amuse', 'ments')] PHJ18881001-V03-10-page12.txt: [('in', 'fectious')] PHJ18881001-V03-10-page22.txt: [('A', 'RE')] PHJ18881101-V03-11-page13.txt: [('wine', 'bibbers')] PHJ18881101-V03-11-page16.txt: [('prom', 'ptings')] PHJ18881101-V03-11-page17.txt: [('in', 'fluence')] PHJ18881101-V03-11-page22.txt: [('so', 'Iree')] PHJ18881101-V03-11-page25.txt: [('he', 'sed')] PHJ18881101-V03-11-page29.txt: [('PRES', 'ERVATION')] PHJ18881101-V03-11-page3.txt: [('The', 'Re')] PHJ18881201-V03-12-page21.txt: [('the', 're')] PHJ18881201-V03-12-page31.txt: [('PRES', 'ERVATION')] PHJ18890101-V04-01-page12.txt: [('A', 'IL')] PHJ18890101-V04-01-page29.txt: [('C', 'AL'), ('r', 'oo')] PHJ18890101-V04-01-page31.txt: [('P', 'RY')] PHJ18890101-V04-01-page5.txt: [('essen', 'tial')] PHJ18890201-V04-02-page23.txt: [('a', 'plomb')] PHJ18890201-V04-02-page28.txt: [('W', 'AIST'), ('M', "isses'")] PHJ18890201-V04-02-page30.txt: [('I', 'LL')] PHJ18890301-V04-03-page2.txt: [('nourish', 'ment')] PHJ18890301-V04-03-page4.txt: [('main', 'taining')] PHJ18890401-V04-04-page10.txt: [('h', 'aring')] PHJ18890401-V04-04-page14.txt: [('sin', 'gle')] PHJ18890401-V04-04-page19.txt: [('d', 'ismally')] PHJ18890401-V04-04-page29.txt: [('A', 'id')] PHJ18890401-V04-04-page32.txt: [('we', 'ekly')] PHJ18890401-V04-04-page5.txt: [('of', 'fering')] PHJ18890501-V04-05-page11.txt: [('men', 'tioned')] PHJ18890501-V04-05-page14.txt: [('wonder', 'ful')] PHJ18890501-V04-05-page20.txt: [('at', 'tained')] PHJ18890501-V04-05-page5.txt: [('or', 'dinary')] PHJ18890601-V04-06-page6.txt: [('r', 'oo')] PHJ18890701-V04-07-page10.txt: [('to', 'ro')] PHJ18890701-V04-07-page12.txt: [('a', 're')] PHJ18890701-V04-07-page17.txt: [('bod', 'ies')] PHJ18890701-V04-07-page27.txt: [('A', 'ugusta')] PHJ18890701-V04-07-page28.txt: [('a', 'sp'), ('p', 'ay')] PHJ18890701-V04-07-page30.txt: [('me', 'ekly')] PHJ18890701-V04-07-page8.txt: [('IDOL', 'ATRY')] PHJ18890801-V04-08-page27.txt: [('st', 'ine')] PHJ18890901-V04-09-page26.txt: [('HEL', 'ENA')] PHJ18890901-V04-09-page27.txt: [('we', 'ekly')] PHJ18891001-V04-10-page26.txt: [('Ad', 'ventists')] PHJ18891001-V04-10-page27.txt: [('we', 'ekly')] PHJ18891001-V04-10-page29.txt: [('Da', 'isy'), ('Supp', 'orter'), ('sin', 'gle')] PHJ18891001-V04-10-page31.txt: [('r', 'oo')] PHJ18891101-V04-11-page1.txt: [('P', 'ACIFIC')] PHJ18891201-V04-12-page29.txt: [('well', 'es')] PHJ18891201-V04-12-page32.txt: [('Heal', 'dsburg')] PHJ18891201-V04-12-page33.txt: [('absti', 'nence')] PHJ18891201-V04-12-page34.txt: [('To', 'ssing'), ('To', 'bacco'), ('To', 'tal'), ('chil', 'dren'), ('To', 'nics'), ('To', 'othache'), ('To', 'ilet')] PHJ18900101-V05-01-page28.txt: [('a', 'sp'), ('La', "dies'"), ('W', 'aist')] PHJ18900101-V05-01-page9.txt: [('temp', 'tations')] PHJ18900201-V05-02-page14.txt: [('accomplish', 'ment'), ('con', 'trol')] PHJ18900201-V05-02-page28.txt: [('Suppo', 'rter'), ('Child', "ren's")] PHJ18900201-V05-02-page31.txt: [('med', 'ical')] PHJ18900301-V05-03-page27.txt: [('in', 'cludes')] PHJ18900301-V05-03-page30.txt: [('r', 'oo'), ('So', 'cial')] PHJ18900301-V05-03-page8.txt: [('temp', 'tations')] PHJ18900401-V05-04-page10.txt: [('care', 'ful')] PHJ18900401-V05-04-page29.txt: [('r', 'oo')] PHJ18900401-V05-04-page31.txt: [('es', 'sentially')] PHJ18900401-V05-04-page32.txt: [('DU', 'NN')] PHJ18900401-V05-04-page9.txt: [('appe', 'tites')] PHJ18900501-V05-05-page11.txt: [('pro', 'fessional')] PHJ18900501-V05-05-page15.txt: [('exam', 'inations')] PHJ18900501-V05-05-page21.txt: [('impor', 'tant')] PHJ18900501-V05-05-page26.txt: [('JOUR', 'NAL')] PHJ18900501-V05-05-page32.txt: [('r', 'oo')] PHJ18900601-V05-06-page27.txt: [('Christian', 'ia')] PHJ18900601-V05-06-page29.txt: [('G', 'REAT')] PHJ18900601-V05-06-page30.txt: [('o', 'ro')] PHJ18900601-V05-06-page5.txt: [('re', 'garded')] PHJ18900701-V05-07-page28.txt: [('P', 'AL')] PHJ18900801-V05-08-page28.txt: [('ODE', 'LL')] PHJ18900801-V05-08-page29.txt: [('Cata', 'logue'), ('l', 'id'), ('So', 'cial')] PHJ18900801-V05-08-page4.txt: [('pun', 'ish')] PHJ18900901-V05-09-page27.txt: [('N', 'ineteenth')] PHJ18900901-V05-09-page28.txt: [('r', 'oo')] PHJ18900901-V05-09-page31.txt: [('A', 'ddress')] PHJ18901001-V05-10-page16.txt: [('LIQ', 'UOR')] PHJ18901001-V05-10-page26.txt: [('CON', 'FLICT')] PHJ18901001-V05-10-page27.txt: [('In', 'spiration')] PHJ18901001-V05-10-page28.txt: [('D', 'aisy')] PHJ18901101-V05-11-page28.txt: [('quart', 'IC'), ('C', 'lasp'), ('r', 'oo'), ('Supp', 'orter')] PHJ18901101-V05-11-page31.txt: [('A', 'merican')] PHJ18901201-V05-12-page14.txt: [('in', 'dulgences')] PHJ18901201-V05-12-page25.txt: [('cir', 'culation')] PHJ18901201-V05-12-page28.txt: [('r', 'oo')] PHJ18901201-V05-12-page30.txt: [('LEA', 'THERETTE')] PHJ18910101-V06-01-page13.txt: [('per', 'se')] PHJ18910101-V06-01-page15.txt: [('and', 're')] PHJ18910101-V06-01-page19.txt: [('to', 're')] PHJ18910101-V06-01-page27.txt: [('exi', 'ts')] PHJ18910101-V06-01-page28.txt: [('Do', 'mestic')] PHJ18910101-V06-01-page30.txt: [('Assy', 'rian')] PHJ18910101-V06-01-page32.txt: [('LEA', 'THERETTE'), ('corres', 'pondence')] PHJ18910201-V06-02-page19.txt: [('b', 'ead')] PHJ18910201-V06-02-page28.txt: [('t', 'oo')] PHJ18910301-V06-03-page11.txt: [('to', 'ro')] PHJ18910301-V06-03-page14.txt: [('sin', 'gularly')] PHJ18910301-V06-03-page32.txt: [('LEA', 'THERETTE'), ('corres', 'pondence')] PHJ18910401-V06-04-page1.txt: [('the', 're')] PHJ18910401-V06-04-page32.txt: [('corres', 'pondence')] PHJ18910501-V06-05-page2.txt: [('r', 'oo')] PHJ18910501-V06-05-page3.txt: [('to', 'ro')] PHJ18910501-V06-05-page31.txt: [('W', 'YLIE')] PHJ18910501-V06-05-page32.txt: [('corres', 'pondence')] PHJ18910601-V06-06-page10.txt: [('fever', 'ous'), ('r', 'oo')] PHJ18910601-V06-06-page18.txt: [('per', 'se')] PHJ18910601-V06-06-page19.txt: [('be', 'mis')] PHJ18910601-V06-06-page25.txt: [('m', 'oistened')] PHJ18910601-V06-06-page29.txt: [('med', 'ical')] PHJ18910601-V06-06-page32.txt: [('corres', 'pondence')] PHJ18910701-V06-07-page26.txt: [('JOUR', 'NAL')] PHJ18910701-V06-07-page31.txt: [('I', 'LL')] PHJ18910701-V06-07-page32.txt: [('LEA', 'THERETTE'), ('corres', 'pondence')] PHJ18910801-V06-08-page27.txt: [('A', 'ND')] PHJ18910801-V06-08-page32.txt: [('g', 'Lt'), ('T', 'ip'), ('corres', 'pondence')] PHJ18910901-V06-09-page24.txt: [('lit', 'tle')] PHJ18910901-V06-09-page28.txt: [('r', 'oo')] PHJ18910901-V06-09-page29.txt: [('med', 'ical')] PHJ18910901-V06-09-page32.txt: [('VI', 'NCENT')] PHJ18911001-V06-10-page26.txt: [('No', 'rth')] PHJ18911001-V06-10-page5.txt: [('bac', 'teria')] PHJ18911001-V06-10-page6.txt: [('phys', 'ical')] PHJ18911101-V06-11-page19.txt: [('prec', 'incts')] PHJ18911201-V06-12-page14.txt: [('pres', 'ent')] PHJ18911201-V06-12-page18.txt: [('treas', 'ure')] PHJ18911201-V06-12-page22.txt: [('Chem', 'istry')] PHJ18911201-V06-12-page28.txt: [('W', 'ASHINGTON')] PHJ18911201-V06-12-page31.txt: [('o', 'ne'), ('IN', 'TRINSIC')] PHJ18911201-V06-12-page33.txt: [('An', 'Ita')] PHJ18911201-V06-12-page34.txt: [('z', 'oo')] PHJ18920101-V07-01-page29.txt: [('THE', 'reat')] PHJ18920101-V07-01-page31.txt: [('ad', 'mitted'), ('IN', 'TRINSIC')] PHJ18920201-V07-02-page18.txt: [('sis', 'ters')] PHJ18920201-V07-02-page21.txt: [('sec', 'ond')] PHJ18920201-V07-02-page27.txt: [('prog', 'ress')] PHJ18920201-V07-02-page5.txt: [('py', 'emia')] PHJ18920201-V07-02-page7.txt: [('s', 'ubject')] PHJ18920301-V07-03-page10.txt: [('r', 'epeated')] PHJ18920301-V07-03-page18.txt: [('enjoy', 'ment')] PHJ18920301-V07-03-page26.txt: [('I', 'Lk')] PHJ18920301-V07-03-page31.txt: [('c', 'losed')] PHJ18920401-V07-04-page12.txt: [('hope', 'ful')] PHJ18920401-V07-04-page16.txt: [('per', 'fect')] PHJ18920401-V07-04-page23.txt: [('char', 'acter')] PHJ18920401-V07-04-page3.txt: [('the', 're')] PHJ18920401-V07-04-page5.txt: [('in', 'flammation')] PHJ18920501-V07-05-page20.txt: [('gen', 'erally')] PHJ18920501-V07-05-page25.txt: [('cos', 'tumes')] PHJ18920501-V07-05-page28.txt: [('T', 'HESE')] PHJ18920501-V07-05-page31.txt: [('L', 'ithographic')] PHJ18920601-V07-06-page21.txt: [('r', 'ie')] PHJ18920801-V07-08-page31.txt: [('H', 'EALTH'), ('I', 'NA')] PHJ18920801-V07-08-page4.txt: [('expen', 'diture')] PHJ18920901-V07-09-page10.txt: [('de', 'preciated')] PHJ18920901-V07-09-page29.txt: [('and', 're')] PHJ18920901-V07-09-page31.txt: [('H', 'EAL'), ('U', 'nderwear')] PHJ18921001-V07-10-page30.txt: [('w', 'ith')] PHJ18921001-V07-10-page31.txt: [('G', 'ev')] PHJ18921001-V07-10-page7.txt: [('MED', 'ICAL')] PHJ18921101-V07-11-page16.txt: [('T', 'ue')] PHJ18921101-V07-11-page25.txt: [('descrip', 'tions')] PHJ18921101-V07-11-page28.txt: [('s', 'ith')] PHJ18921101-V07-11-page31.txt: [('H', 'EALTHFUL')] PHJ18921201-V07-12-page11.txt: [('ene', 'mies')] PHJ18921201-V07-12-page15.txt: [('THE', 'BA')] PHJ18921201-V07-12-page31.txt: [('H', 'EALTHFUL'), ('excursion', 'ists')] PHJ18921201-V07-12-page32.txt: [('t', 'reatment')] PHJ18921201-V07-12-page34.txt: [('sum', 'ption')] PHJ18921201-V07-12-page4.txt: [('sup', 'posedly')] PHJ18921201-V07-12-page5.txt: [('es', 'pecially')] PHJ18960101-V11-01-page19.txt: [('JOUR', 'NAL')] PHJ18960101-V11-01-page29.txt: [('A', 'VE')] PHJ18960101-V11-01-page32.txt: [('Pro', 'fessions')] PHJ18960101-V11-01-page7.txt: [('a', 're')] PHJ18960201-V11-02-page19.txt: [('com', 'fortable'), ('health', 'ful')] PHJ18960301-V11-03-page24.txt: [('U', 'RN')] PHJ18960601-V11-06-page29.txt: [('Y', 'ork')] PHJ18960601-V11-06-page31.txt: [('CIR', 'CUIT')] PHJ18960701-V11-07-page10.txt: [('par', 'ticularly')] PHJ18960701-V11-07-page25.txt: [('sub', 'sistence')] PHJ18960701-V11-07-page3.txt: [('develop', 'ment')] PHJ18960701-V11-07-page31.txt: [('LA', 'ND')] PHJ18960701-V11-07-page7.txt: [('con', 'tagious')] PHJ18960801-V11-08-page19.txt: [('med', 'ical')] PHJ18960801-V11-08-page3.txt: [('com', 'pany')] PHJ18960801-V11-08-page32.txt: [('o', 'ng')] PHJ18960801-V11-08-page5.txt: [('or', 'zo')] PHJ18960901-V11-09-page4.txt: [('in', 'duced')] PHJ18960901-V11-09-page5.txt: [('a', 'nd')] PHJ18961001-V11-10-page2.txt: [('the', 're')] PHJ18961001-V11-10-page27.txt: [('con', 'stant')] PHJ18961001-V11-10-page30.txt: [('c', 'hile')] PHJ18961101-V11-11-page32.txt: [('I', 'll')] PHJ18961201-V11-12-page12.txt: [('p', 'istil')] PHJ18961201-V11-12-page28.txt: [('P', 'olish')] PHJ18990101-V14-01-page1.txt: [('JouR', 'NAL')] PHJ18990101-V14-01-page14.txt: [('req', 'uest')] PHJ18990101-V14-01-page15.txt: [('A', 'NEM')] PHJ18990201-V14-02-page12.txt: [('prop', 'erties')] PHJ18990201-V14-02-page9.txt: [('consist', 'ently')] PHJ18990301-V14-03-page19.txt: [('JoUR', 'NAL')] PHJ18990301-V14-03-page5.txt: [('con', 'trast')] PHJ18990601-V14-06-page13.txt: [('Whip', 'poorwill')] PHJ18991001-V14-10-page2.txt: [('alco', 'holic')] PHJ18991101-V14-11-page19.txt: [('sug', 'gestion')] PHJ18991101-V14-11-page2.txt: [('R', 'ussia')] PHJ19010201-V16-02-page12.txt: [('CHRIS', 'TIAN')] PHJ19010201-V16-02-page2.txt: [('I', 'wo')] PHJ19010201-V16-02-page21.txt: [('A', 'LT'), ('H', 'OW')] PHJ19010301-V16-03-page27.txt: [('d', 'Ie')] PHJ19010301-V16-03-page30.txt: [('p', 'acific')] PHJ19010401-V16-04-page1.txt: [('w', 'itt')] PHJ19010401-V16-04-page33.txt: [('A', 'NE'), ('C', 'ItE')] PHJ19010401-V16-04-page34.txt: [('chron', 'ic')] PHJ19010501-V16-05-page36.txt: [('a', 'll')] PHJ19010601-V16-06-page45.txt: [('I', 'NG')] PHJ19010601-V16-06-page49.txt: [('a', 'nd')] PHJ19010701-V16-07-page23.txt: [('W', 'ashington')] PHJ19010701-V16-07-page9.txt: [('so', 'weth')] PHJ19010801-V16-08-page20.txt: [('D', 'RESS')] PHJ19010801-V16-08-page28.txt: [('en', 'gine')] PHJ19010801-V16-08-page33.txt: [('C', 'ap'), ('p', 'acific')] PHJ19010801-V16-08-page4.txt: [('E', 'ff')] PHJ19010901-V16-09-page30.txt: [('r', 'EP')] PHJ19010901-V16-09-page31.txt: [('C', 'tn')] PHJ19011001-V16-10-page26.txt: [('LE', 'GENDRE')] PHJ19011101-V16-11-page2.txt: [('Deli', 'cious'), ('lit', 'tle')] PHJ19011101-V16-11-page3.txt: [('SAD', 'LER')] PHJ19011101-V16-11-page30.txt: [('t', 'il')] PHJ19011201-V16-12-page16.txt: [('JO', 'URNAL')] PHJ19011201-V16-12-page2.txt: [('lit', 'tle')] PHJ19011201-V16-12-page3.txt: [('C', 'apitol')] PHJ19011201-V16-12-page39.txt: [('R', 'IGHTED')] PHJ19020201-V17-02-page2.txt: [('con', 'venient')] PHJ19020301-V17-03-page27.txt: [('preven', 'tive')] PHJ19020301-V17-03-page35.txt: [('C', 'ItE'), ('R', 'IGHTED')] PHJ19020301-V17-03-page36.txt: [('P', 'acific')] PHJ19020401-V17-04-page34.txt: [('O', 'ffer')] PHJ19020601-V17-06-page13.txt: [('SAN', 'ITARIUM')] PHJ19020601-V17-06-page32.txt: [('ST', 'ENCILS'), ('BRAN', 'DS')] PHJ19020601-V17-06-page37.txt: [('cor', 'onation')] PHJ19020801-V17-08-page28.txt: [('JOUR', 'NAL')] PHJ19020801-V17-08-page30.txt: [('Y', 'ork')] PHJ19020901-V17-09-page31.txt: [('BRAN', 'DS')] PHJ19021001-V17-10-page31.txt: [('M', 'adison')] PHJ19021001-V17-10-page34.txt: [('BRA', 'NCH')] PHJ19021101-V17-11-page12.txt: [('H', 'ygienic')] PHJ19021101-V17-11-page29.txt: [('M', 'ariposa')] PHJ19021101-V17-11-page31.txt: [('HEAL', 'TH'), ('P', 'ACIFIC')] PHJ19021101-V17-11-page36.txt: [('R', 'UN')] PHJ19021201-V17-12-page30.txt: [('M', 'ariposa')] PHJ19030101-V18-01-page2.txt: [('r', 'oo')] PHJ19030101-V18-01-page27.txt: [('OR', 'THOPEDIC')] PHJ19030101-V18-01-page30.txt: [('A', 'uthority')] PHJ19030101-V18-01-page34.txt: [('Health', 'ful')] PHJ19030101-V18-01-page35.txt: [('I', 'ri')] PHJ19030101-V18-01-page4.txt: [('H', 'ip')] PHJ19030201-V18-02-page25.txt: [('JouR', 'NAL')] PHJ19030501-V18-05-page24.txt: [('JOUR', 'NAL')] PHJ19030501-V18-05-page34.txt: [('a', 'id')] PHJ19030501-V18-05-page35.txt: [('A', 'Ny')] PHJ19030701-V18-07-page20.txt: [('a', 'zo')] PHJ19030701-V18-07-page24.txt: [('char', 'acter')] PHJ19030701-V18-07-page28.txt: [('JOUR', 'NAL')] PHJ19030801-V18-08-page33.txt: [('IN', 'CL')] PHJ19031101-V18-11-page32.txt: [('C', 'hicago')] PHJ19031201-V18-12-page35.txt: [('IN', 'ti')] PHJ19040101-V19-01-page1.txt: [('AT', 'TA')] PHJ19040201-V19-02-page28.txt: [('W', 'hile')] PHJ19040201-V19-02-page8.txt: [('j', 'uices')] PHJ19040301-V19-03-page31.txt: [('BOO', 'KS'), ('A', 'nal')] PHJ19040401-V19-04-page28.txt: [('HA', 'IG')] PHJ19040501-V19-05-page33.txt: [('East', 'ern')] PHJ19040501-V19-05-page34.txt: [('pam', 'phlet')] PHJ19040601-V19-06-page27.txt: [('I', 'Lk')]
In [32]:
# %load shared_elements/summary.py
summary = GoH.reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/PHJ/correction7 Average verified rate: 0.9812578035460998 Average of error rates: 0.02651921795156632 Total token count: 2849666
In [33]:
# %load shared_elements/top_errors.py
errors_summary = GoH.reports.get_errors_summary( summary )
GoH.reports.top_errors( errors_summary, 10 )[:50]
Out[33]:
[('m', 2724), ('d', 2324), ("'", 1997), ('e', 1380), ('r', 1248), ('t', 1203), ('w', 1166), ('n', 1118), ('co', 1109), ('f', 795), ('g', 759), ('x', 648), ('lb', 583), ('sel', 251), ('th', 250), ('mo', 230), ('oo', 222), ('pp', 218), ('z', 215), ('k', 214), ('u', 196), ("an'", 192), ('q', 132), ('ex', 106), ('ournal', 105), ('al', 94), ('te', 85), ('oz', 81), ('ga', 80), ('ro', 77), ('pa', 74), ('va', 74), ('munn', 73), ('io', 72), ('-', 66), ('ti', 66), ("infants'", 61), ('id', 55), ('viperance', 54), ('zo', 53), ('em', 53), ('tion', 51), ("''", 49), ('si', 47), ('urnal', 47), ('fahr', 46), ('cc', 45), ("hours'", 44), ('cloe', 43), ('tt', 42)]
Review Remaining Errors¶
In [34]:
GoH.reports.docs_with_high_error_rate(summary)
Out[34]:
[('PHJ19030301-V18-03-page4.txt', 1.0), ('PHJ19030201-V18-02-page4.txt', 1.0), ('PHJ19030501-V18-05-page4.txt', 0.744), ('PHJ19030401-V18-04-page35.txt', 0.719), ('PHJ19040201-V19-02-page4.txt', 0.667), ('PHJ19030901-V18-09-page4.txt', 0.632), ('PHJ19021201-V17-12-page4.txt', 0.6), ('PHJ19030101-V18-01-page35.txt', 0.556), ('PHJ18870801-V02-08-page1.txt', 0.468), ('PHJ19011201-V16-12-page4.txt', 0.429), ('PHJ19011201-V16-12-page3.txt', 0.388), ('PHJ18870201-V02-05-page1.txt', 0.376), ('PHJ19010201-V16-02-page33.txt', 0.375), ('PHJ18870401-V02-06-page1.txt', 0.355), ('PHJ18860801-V02-02-page1.txt', 0.354), ('PHJ18861201-V02-04-page1.txt', 0.35), ('PHJ19010701-V16-07-page13.txt', 0.311), ('PHJ18871001-V02-09-page1.txt', 0.309), ('PHJ18870601-V02-07-page1.txt', 0.308), ('PHJ18861001-V02-03-page1.txt', 0.297), ('PHJ19031201-V18-12-page35.txt', 0.274), ('PHJ19011001-V16-10-page33.txt', 0.267), ('PHJ19021101-V17-11-page36.txt', 0.264), ('PHJ19011201-V16-12-page2.txt', 0.259), ('PHJ19031201-V18-12-page4.txt', 0.257), ('PHJ19010501-V16-05-page35.txt', 0.248), ('PHJ19010301-V16-03-page33.txt', 0.246), ('PHJ19030901-V18-09-page35.txt', 0.243), ('PHJ19030201-V18-02-page35.txt', 0.243), ('PHJ18880501-V03-05-page32.txt', 0.241), ('PHJ19030301-V18-03-page33.txt', 0.24), ('PHJ19020801-V17-08-page36.txt', 0.234), ('PHJ19010701-V16-07-page31.txt', 0.232), ('PHJ19010801-V16-08-page30.txt', 0.229), ('PHJ19040201-V19-02-page35.txt', 0.229), ('PHJ19040301-V19-03-page29.txt', 0.227), ('PHJ19011001-V16-10-page31.txt', 0.225), ('PHJ19010901-V16-09-page33.txt', 0.225), ('PHJ19030301-V18-03-page35.txt', 0.223), ('PHJ19020901-V17-09-page1.txt', 0.222), ('PHJ19010801-V16-08-page33.txt', 0.222), ('PHJ19030601-V18-06-page30.txt', 0.219), ('PHJ19010401-V16-04-page30.txt', 0.215), ('PHJ19010201-V16-02-page30.txt', 0.215), ('PHJ19010501-V16-05-page32.txt', 0.214), ('PHJ19020901-V17-09-page4.txt', 0.211), ('PHJ19010701-V16-07-page33.txt', 0.21), ('PHJ19020501-V17-05-page1.txt', 0.21), ('PHJ19020401-V17-04-page1.txt', 0.208), ('PHJ19031201-V18-12-page32.txt', 0.207), ('PHJ19030701-V18-07-page35.txt', 0.205), ('PHJ18880201-V03-02-page27.txt', 0.205), ('PHJ19020301-V17-03-page1.txt', 0.203), ('PHJ19010901-V16-09-page31.txt', 0.203), ('PHJ19011101-V16-11-page35.txt', 0.203)]
In [35]:
# %load shared_elements/high_error_rates.py
doc_keys = [x[0] for x in GoH.reports.docs_with_high_error_rate(summary) if x[1] > 0.4]
GoH.utilities.open_original_docs(doc_keys, directories['cycle'])
Opened files: PHJ19030301-V18-03-page4.txt PHJ19030201-V18-02-page4.txt PHJ19030501-V18-05-page4.txt PHJ19030401-V18-04-page35.txt PHJ19040201-V19-02-page4.txt PHJ19030901-V18-09-page4.txt PHJ19021201-V17-12-page4.txt PHJ19030101-V18-01-page35.txt PHJ18870801-V02-08-page1.txt PHJ19011201-V16-12-page4.txt
Mostly image pages.
In [36]:
GoH.reports.long_errors(errors_summary, min_length=15)
Out[36]:
(['sanitaryshnitary', "english'temperance", 'restaurant-stand', 'wefurnishpatternsforhigh', 'acccoommodations', 'good-for-nothing', 'iiiiietaiiawrialauumnamumaymmo', 'queen-of-the-queens', 'gradual-reduction-process', 'itchialrractaerned', 'three-sixteenths', 'artificially-fed', 'three-hundredths', 'gilmniiiimmiumffinum', 'whisky-drunkards', 'perfectly-constructed', 'gently-stimulating', 'divinely-appointed', "paid'advertisements", 'mnuamanenummonami', 'non-crystallizable', 'mminiminnecatalogue', 'supersensitiveness', 'originallyjnvestigated', 'household-helpers', 'aermotorwindmills', "thesd'habitliabits", 'rosy-complexioned', 'sanitariumsanitarium', 'health-promoting', 'deudnsidtnaigntges', 'public-spiritedness', 'advertielderitiderb', 'unconditionaltly', 'pleasure-seekers', 'not-afraid-of-the-rain', 'accomplishmentwhichhascomevery', 'self-preservation', 'heaven-appointed', 'over-consumption', 'avoiddanarousbarbi', 'prayer-answering', 'souland-body-destroying', 'maid-of-all-work', 'never-seen-flirting', 'imperfectlycleansed', 'ewditchaunneoxctoepietiron', 'sulphur-bleaching', 'partially-filled', 'whitewhitekorackers', 'localititecalities', 'moisture-absorbing', 'intercommunicability', 'voitesooprieffsioretooneadrress', 'arigrirralenrillitmonown', 'absent-mindedness', 'ingsliimiediullivigi', 'non-contagiousness', 'sanitaritnnitarium', 'alcohol-poisoned', 'rapidly-increasing', 'become-estranged', "subject--mother's", 'muscle-producing', 'helenasanitarium', 'commonly-received', 'twenty-hundredths', 'nerve-strengthening', 'contented-looking', 'wholesale-liquor-dealer', 'pacificpresspublishinghouse', 'foundation-stone', 'mitimiiiiiiiiiiimmitutimmiticium', 'spanish-american', 'time-and-strength-exhausting', 'barbarsympathize', 'several-months-old', 'badly-ventilated', 'lucas-championniere', 'better-befitting', 'properly-fitting', 'xjkivivwplyofurt', 'picture-of-health', 'regularly-graduated', 'maiden-in-her-teens', 'perfectsaanction', 'ilintirlitifillairirtigillitiffitiffestitfir', 'feather-stitched', 'delicatelybalanced', 'little-suspected', 'physician-in-chief', 'poisonoussubstances', 'firtuanfiireilitiid', 'commander-in-chief', 'whilecross-examining', 'quickly-repeated', 'different-colored', 'castro-intestinal', 'unremunerativeloans', 'stronglyflavored', 'needsintelligence', 'criminalcarelessness--getting', 'at-home-in-her-place', 'givenfromcatholi', 'lungs-and-stomach', 'ernecewaotnigdirlywanossphet', 'easily-assimilated', 'self-examination', 'heavenly-wrought', 'anti-stimulating', 'gymnasiyinnasium', 'taste-preference', "sunday-morning's", 'newhomesewingmachinerorange', 'long-believed-in', 'nerve-destroying', 'gradually-developed', 'over-accumulation', 'tetextbaoicinary', 'mprohibitioncampaignexchange', 'xotimutimootxxximaklasimmitarmalemmunot', 'wear-her-own-hair', 'oftencircumstances', 'umninmalimenuilig', 'theruralhealtehtrerat', 'freshly-slaughtered', 'peculiarly-arranged', 'cleverly-executed', 'properly-performed', 'chemicallytreated', 'duringthechristian', 'soul-and-body-destroying', 'eecliceizliecouizieificinciimic', 'sewage-contaminated', 'fellow-prisoners', 'supercarbonization', 'mrprohibitioncampaignexchange', 'kloxfpaperkifriu', 'aplebaredsohlaubtitonstriasiaglhl', 'eiscehviniisbtos', 'life-ioliifeciistig', 'whole-wheat-flour', 'starch-digesting', 'rvfirrtprmflftitiiifirt', 'self-destructive', "consolation'that", 'mampormitylifighlimirummx', 'alpequckaabuckles', 'considerieguttieeenig', 'skirt-supporting', 'abstidegeneration', 'vegetable-colored', 'cream-bespattered', 'iihwalithlipainful', 'goodhealthrestaurant', 'free-from-a-stare', 'fifteen-hundredths', 'christirailitniifi', 'stenographlicaly', 'properly-regulated', 'half-unconscious', 'closely-corseted', 'fifteen-hundredth', 'comfortablycircumstanced', 'ficilirifirrefrfittifiefewitivi', 'raspberry-banana', 'future-most-fair', 'finely-pulverized', 'morodlaitiestaco', 'itithnffiffiefinfficiently', 'thrashing-machine', 'great-grandchildren', 'neiromesewingmachinea', 'motor-depressant', 'iontrontriustrororsummi', 'highly-stimulating', 'cifahrilrilfstry', 'health-and-life-destroying', 'frequently-repeated', 're-establishment', 'self-control--not', 'higgledy-piggledly', "'non-compensation'", 'suffiexperiments', 'boa-constrictors', 'greatgrandmothers', 'character-shaping', 'painkillezejkillers', 'becomeintelligent', 'tastefully-decorated', 'dramshop-keepers', 'dyspepsia-producing', 'conscience-stricken', 'photo-engravings', 'wanglannantinampanagamatt', 'charactershaping', 'pressure-induced', 'trichina-infested', 'carefully-watched', 'pacificpresspublishingrouseoaldgildcal', 'literadestruction', 'volcanic-scarred', 'whochangedthesabbath', 'vegetable-eating', 'altogether-too-common', "formingasidetableforsim'sposition", 'husband--personal', 'siltooftugllezre', "little'understood", 'is-reladiefitched', 'auto-intoxication', 'bread-and-butter', 'satisfacantiseptic', 'delicately-formed', 'twenty-threeinch', 'instruction-book', 'pleasantdisinfectant', 'improperlywashed', 'ever-accelerating', 'informationladdress', 'dateand-barley-eating', 'nefftittiattivid', 'ilimunaiwilmiumumiumilmm', 'quickly-shifting', 'fever-engendering', 'rhrmeusmoaftidsm', 'well-disciplined', 'lttlitlyamtkilletaitaillitialaailkill', 'birds-of-paradise', 'penny-in-the-slot', 'clothtscgtiliehlbf', 'non-professional', 'auto-suggestions', 'goldengolilawnbrown', 'scientifically-conducted', 'africa-international', 'medico-chirurgical', 'gradual-reduction', 'iikrtgninpcelruagrev', 'barbaphysiologists', 'iftriirittialiimi', 'fellow-travelers', 'poison-destroying', 'carefully-regulated', 'ouriarcilitiesare', 'self-gratification', 'uummairruilumounammor', 'free-from-the-blues', 'poorly-ventilated', 'counter-arguments', 'theonlysewingmachine', 'partially-cooked', 'familypresenting', 'powerpowirinanduinactive', 'heat-and-force-producing', 'go-as-you-please', 'photo-lithography', 'alkaloid-containing', "'mama-don'tlove-her-little-angel-baby-no-more-'tall", 'great-grandfather', 'hypochondriachal', 'recomaeznaiedufnded', 'andlurntshesheal', 'prekrioeetcribed', 'alai-extraordinary', 'reformationsburg', 'mountain-climbers', 'non-intoxicating', 'sought-everywhere', 'regularlyprescribed', 'sltlortrshrirteried', 'tiktio-stringent', 'thepitoirtrolblattt', 'commandment-keeping', 'rapidly-advancing', 'titmemommiffitriliii', 'cliinoindhwialtceh', 'evcetrsyahroeuwsheosledt', 'easily-digestible', 'appetizinerreatniogtheningbreakfast', 'selfcomplaisance', 'litiadziaidpeizially', 'fellow-passengers', 'long-to-beremembered', 'thoroughly-cooked', 'newhomesewingmachineaortinge', 'lawrillifiliaminiwi', 'forraravageikraitarnviccimuli', 'selfaggrandizement', 'naelstnoonwhenarlithr', 'thoroughly-educated', 'mitantviactuvinl', 'total-abstinence', 'mmtvmszkommommeszmmtimmotmectoot', 'alcoholic-poisoned', 'quickly-workedoff', 'over-development', 'all-falling-to-pieces', 'nmagamangearateannitm', 'nerve-prostration', 'carefullyarranged', 'mommirmeneeruton', 'innocent-hearted', 'chemically-changed', 'ingeniously-devised', 'melxximarantemennzeuta', 'substittatbstitutdisper', 'suffereduffrocal', 'auto-intoxications', 'xvimiiminimmilklakm', 'pertectsatizicroy', 'arterio-sclerosis', 'eltellimiticlarnonxiimpewommirm', 'kardobenedictenkraut', 'sweet-dispositioned', 'minding-my-own-business', 'wholftnthialeavitheat', 'pleasure-seeking', 'carefullycontrolled', 'kreislaufstbrungen', 'cabbage-soup-andrye-bread-eating', 'disease-carrying', 'wasatworkonafarmfor', 'elaborately-decorated', 'oatmtkitilieallbiscuit', 'rprohibitioncampaignexchange', 'uraragarararrarrarrantatti', 'mechano-therapeutics', 'inter-dependence', 'practically-balanced', 'platindatittneal', 'iimunmiumfdialemumammumaniumu', 'extensively-used', 'antdehmeamerrlmattete', 'kitchenspoonfuls', 'would-be-hygienist', 'contaiaonitiiituidthing', 'poorly-constructed', 'faraerelimmammenixamievn', 'waste-not-an-hour', 'good-for-nothings', 'supernormally-imparted', 'non-grape-producing', 'narrow-mindedness', 'ivremilawritaxtomitsmeati', 'tender-heartedness', 'self-development', 'prescription-book', 'coarsely-prepared', 'elaborately-wrought', 'novtigigtgeowawavanwinumagtgtgtagtel', 'velkistrciffirst', "woman'sjournalof", 'progress-evoking', 'diseaseproducing', 'tobacco-saturated', 'develiiptreieszensoteadndfotrheitsa', 'tubular-contracted', 'tobacco-consumers', 'miummiiniimimmormilleirimmunipm', 'no-neck-to-her-dress', 'pfrfectsamiction', 'honestly-courting', 'antedineveranyshow', 'constantly-growing', 'respectably-dressed', 'self-satisfaction', 'ithemnpromisingly', 'consumptionbreeding', 'carefully-selected', 'physio-mechanism', 'sharply-indicated', 'reccommendations', 'asitlitlattialdr', 'cbtherideletenious', 'fashionable-dressed', 'nagnatematmantgatsmage', 'artificially-induced', 'thought-producing', 'secretary-general', 'red-lips-so-delicious', 'mosquito-breeding', 'self-eliminative', 'self-destruction', 'icooxynizzionearea', 'frightfully-coated', 'magneto-conservative', 'transmissibility', 'non-tobacco-using', 'ofiatioifuettiquette', 'sulphur-bleached', 'fashionably-attired', 'ttitttttttttittitt', 'pleasant-mannered', 'naudeekliitfokliiitokittiti', 'self-administration', 'mommevetmormaarvottnkwakomeinkatit', 'suddenly-occurring', 'registrar-general', 'early-contracted', 'finely-developed', 'physiinstruments', "housek'eapigkceper", 'soul-development', 'electro-chemical', 'trichina-spiralis', 'one-hundred-foot', 'one-five-hundredth', 'chemically-prepared', 'freshly-prepared', 'sensitively-organized', 'frequently-changed', 'much-to-be-pitied', 'self-sympathizer', 'dolewhooping-cough', 'helenasanitariumrs', 'pivectsatiention', 'immediatelyremoved', 'iiiififiiiiiiiiii', 'rapidly-multiplying', 'slaughter-houses', 'companion--assistance', 'one-three-hundredth', 'ameraccomplished', 'trviavommxtrvargurgsat', 'prettytypewriter', 'quiteindigestible', 'thoroughlywarmed', 'prematpzernature', 'diseasecleanliness', 'constantlyincreasing', 'inspector-general', 'stimulo-sedative', 'divinely-ordained', 'pivectsatistiction', 'bacon-shakespeare', 'suiilliirhuthbert', 'breast-nourished', 'butthebodywithoutasoulisonlya', 'instructions-with', 'unself-consciousness', 'irregularly-shaped', 'set-em-up-all-round', 'strength-imparting', 'dujardin-beaumetx', 'curawarararanzatararom', 'physibroken-down', 'newhomesewingmachineco', 'anti-constipation', 'unamaniummumumusa', 'ctluiwaeffemmeinettneffeixorg', 'self-disciplined', 'over-sentimental', 'three-thousandth', 'nnonoranningrinnuncarannonciationcimannoroarancinempzinntn', "it'swuffmoretome", 'shortesladrtedexit', 'divinelyappointed', 'rereconstruction', 'near-sightedness', "temperate'thaliits", 'srzanlleeonfditorfedthuecet', 'nutrition-drawing', 'drictithietitstfrd', 'wealth-producers', 'self-registering', 'swiftlydeparting', 'fairly-developed', 'ctimiopripliardeaey', 'shippedererywhere', 'pageillustratthns', 'pertectsatanction', 'esteemeriftiseed', 'tausendguldenkraut', 'homesewingmachineaorangemass', 'innocent-looking', "school-children's", 'long-established', 'pleuro-pneumonia', 'counteractedcrbyted', 'thirst-creatingpower', 'wevonelitlitrakliailiamem', 'rirtroriffirirvi', 'ritresctesvaerreypir', 'aamenontmenalwanunionawaimmatatatem', 'tifitoilioffileigioulders', 'childreehiltiffitig', 'sundltedablaclei', 'willebcuyathefsordirolllratoicrp', 'carelessness--getting', 'delicately-furnished', 'poorly-nourished', 'pfreectsatisfiction', 'newhomesewingmachine', 'ourfacilitiesare', 'pageillustrations', 'equally-important', 'delleatelelicate', 'health-restoring', 'arrowroot-starch', 'well-intentioned', "remedial'measures", 'well-advertising', 'poison-producing', "superintendents'", 'three-months-old', 'eighteenth-century', 'brotherly-kindness', 'dujardin-beaumetz', 'fignuassigumffinquirsi', 'nineteen-twentieths', 'wonderfully-taking', 'short-sightedness', 'wiffinwowhiiiiiwiffeffitivil', 'steadily-increasing', 'intellivibrations', 'fiteltkeinieftneinng', 'precedingpreparations', 'before-described', 'imperfectlymasticated', 'generous-hearted', 'otettettentuovel', 'oteefcleittuttve', 'permanently-established', 'imperfectly-cooked', 'hunger-headaches', 'glosso-pharyngeal', 'theircrispnessitmayberestoredbyplacingtheminahotovenfor', 'bitreieszensoteadndfotrheitsagsuernecewaotrilidirlywanossphety', 'fellow-countryman', 'weakielleliklmeisink', 'immegeographical', 'passion-stimulating', 'oxygenaijealtigul', 'health-destroyer', 'raabritioonfoffotealthful', 'continually-multiplying', 'well-proportioned', 'one-twenty-fifth', 'skilfully-worded', 'largely-experienced', 'hislitthitherother', 'erhbat-embarrassment', 'ciliimmuciziolic', 'householddictionary', 'cruelly-ignorant', 'artificially-colored', 'holonaqanitariutre', 'micro-photographs', 'needle-and-thread', 'noarffillptdally', 'maktbtlignietthet', 'requiremultiplies', 'maize-and-macaroni-eating', 'healthdestroying', 'semi-occasionally', 'comfortable-feeling', 'brownsugarinwhichahalfteaspoonpulverized', 'tissuepaper-like', 'non-self-consciousness', 'corresfiondenz-blatt', 'artificially-heated', 'at-ten-in-her-bed', 'insponsibilities', 'austro-hungarian', 'shorteshortenekutor', 'tempercelebrated', 'acquire-strength', 'turn-up-the-nose', 'self-reliant-looking', 'four-thousandths', 'dangerouslycontaminated', 'gastro-intestinal', 'illy-constructed', 'skillfully-contrived', 'nvenoutiottnnamonivemmaigaramitmm', 'theaqties-question', 'linlsistimisisted', 'cunningly-perverted', 'poorly-developed', 'smoothly-polished', 'carefully-tested', 'church-membership', 'berrenger-ferroud', 'pondeneestrictly', 'intinixtivillytively', 'partiallyreleased', 'recently-discovered', 'constipation-its', 'ricememeicecomememizimicimmiiiiiiimoccemidemei', 'experienexperience', 'semi-unconsciousness', 'greatgrandmother', 'perfettly-dressed', 'widely-differing', 'generally-accepted', 'strength-testing', 'needle-andthread', 'distillery-slops', 'cruelly-accurate', 'disease-bringing', 'agairksqlfbcpthe', 'poison-eliminating', 'forsimplicityitbeatatheworld', 'twenty-five-inch', 'sanfranciscoagents', 'nineteenth-century', 'opportuthemselves', 'stomaciitontstoki', 'thoroughlyorganized', 'molonnolommudoonnutot', "''''''''''''''''", 'descriptivearticles', 'cotemporaneously', 'sttflkikokiftnwtiolen', 'unconscientiously', 'splendid-looking', 'flower-bouquet-like', 'broad-shouldered', 'consumptive-looking', 'intendedtaitlkdsithei', 'counter-irritant', 'comsuperstitions', 'temperately-worded', 'catarrh--laceration', 'highly-developed', 'suitedtomanyuses', 'eighty-and-eight', 'properly-conducted', 'delicately-reared', 'howtodresshealthfully', 'non-commissioned', 'eight-hundredpound', 'semi-intoxication', 'yellow-fever-like', 'closely-arranged', 'ceionametiamanammutanda', 'contra-indicated', 'dabclitsittliblished', 'pertectsatisfiction', 'earnestly-written', 'police-headquarters', 'noncommunicability', 'poverty-stricken', 'ultra-fashionable', 'carlyledescribed', 'noncrystallizable', 'straight-jackets', 'norway-sundhedabladat', 'self-renunciation', 'iforinciratnoral', 'gris-amber-steamed', 'handsomely-bound', 'super-excitation', 'gorgeously-dressed', 'newspaper-covered', 'slaughtering-places', 'selfsatisfaction', 'fifteen-year-old', "mothers'meetings", 'htenanitariantsm', 'akfastmereekfast', 'carefully-prepared', 'strangely-poised', 'spiritualeffectsofintemperance', 'oliver-twistlike', 'boiledwheat-and-fruit-eating', 'properly-prepared', 'nicely-cushioned', 'health-andlife-destroying', 'whitepublishingco', 'motherrecognized', 'generalattention', 'wine-drinking-for-dyspepsia', 'hlenfulltarianism', 'life-restricting', 'wasthrownintoanunfrequented', 'iiicigslirplelasure', 'great-great-great-great-grandchildren', 'compensatoryluxury--the', 'bfilvilarliftightklalthiglitilaritiontintarablanklm', 'fiddle-sticksrsaid', 'sonntnncvccocnnnosc', 'slightly-rounded', 'mionviimillimarkliblogemtialk', 'slaughtering-place', 'witagitiniftititiat', 'thread-and-needle', 'world-conquering', 'wellproportioned', 'health-bestowing', 'character-making', 'scientifically-demonstrated', 'thoroughly-applied', 'rapidly-developing', 'sanguiniolymphatic', 'self-maintenance', 'star-like-shaped', 'self-distrusting', 'additiohtuddition', 'properly-directed', 'skillfully-prepared', 'aerillaissithsbugh', 'wanteweverycounty', 'frifreelirdindird', 'malaria-poisoned', 'yellowish-colored', 'ilimmusiinunaimmumwmeneu', 'widely-advertised', 'aamognmuenitainftwomatualenamman', 'well-authenticated', 'scientificamerican', 'rapidlydeveloping', "constitunature's", 'lactid-acid-containing', 'everaccelerating', 'highly-organized', 'mentho-formoline', 'ifinfifttlicfiffeitivef', 'onthsallshallbecomeyourown', 'commonly-recognized', 'thltisecthsinerispness', 'nnntvvionniccnnnwkwx', 'whiskybesprinkled', 'abnakawwahyeiirounsinsfdsetiori', 'irregular-shaped', 'sineottlievatell', 'fellow-passenger', 'slfiesiiiwisecibade', 'liberallyfurnished', 'glosso-pharvngeal', 'dead-horse-flesh-fattened', 'house-furnishing', 'vapor-inhalation', 'rapidly-spreading', 'princess-of-peace', 'magazinecontains', 'etheonlysewingmachine', 'health-destroying', 'pfriectsatanction', 'question-scavengers', 'electro-therapeutics', 'twenty-four-page', 'electro-therapeutic', 'self-preservashun', 'imimmosummworanicvnimmaixiimicil', 'wine-drinkingfor-dyspepsia', 'strength-producing', 'chititleethildren', 'dyspepsia-breeding', 'iint-strattitstick', 'noixoffeacitexcmicam', "pivects'ataniction", 'cheap-jewel-flash', 'street-sweepings', 'winnegar-simpson', 'super-oxygenates', 'zealand-international', 'cadaverous-looking', 'counter-indicated', 'well-illustrated', 'underciftimmtfrilileg', 'self-purification', 'newly-discovered', 'anti-gkettiagottruipation', 'beautifully-rounded', 'consumpabstainers', 'defectively-lighted', 'thought-pictures', 'favorably-conducted', 'apparently-relieved', 'properlyregulated'], 15)
Correction 8 -- Remove long tokens¶
In [37]:
# %load shared_elements/remove-tokens-with-long-strings-of-characters.py
prev = "correction7"
cycle = "correction8"
directories = GoH.utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = GoH.utilities.readfile(directories['prev'], filename)
text = re.sub(r"[0-9,!?$:;&]", " ", content)
tokens = GoH.utilities.tokenize_text(text)
replacements = []
replacements.append(GoH.clean.check_for_repeating_characters(tokens, "i|I"))
replacements.append(GoH.clean.check_for_repeating_characters(tokens, "m|M"))
replacements.append(GoH.clean.check_for_repeating_characters(tokens, "n|N"))
replacements.append(GoH.clean.check_for_repeating_characters(tokens, "f|F"))
replacements.append(GoH.clean.check_for_repeating_characters(tokens, "t|T"))
replacements.append(GoH.clean.check_for_repeating_characters(tokens, "l|L"))
replacements = [item for sublist in replacements for item in sublist]
if len(replacements) > 0:
print('{}: {}'.format(filename, replacements))
for replacement in replacements:
content = GoH.clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
PHJ18851201-V01-04-page24.txt: [('IIIIfIfIIIIIIIIII', ' ')] PHJ18860801-V02-02-page1.txt: [('soNntNncvccocNnNosc.m.tv.c.tnNxcvccw', ' '), ('NNNtvvioNnIccNnNwkwx', ' ')] PHJ18870601-V02-07-page1.txt: [('N.NNILNNILNNN.', ' ')] PHJ18881101-V03-11-page27.txt: [('nnonoranniNgrinnuncarannonciationcimannoroarancinempzinntn', ' ')] PHJ18890501-V04-05-page24.txt: [('Itithnffiffiefinfficiently', ' ')] PHJ18900801-V05-08-page32.txt: [('WIFFINWOWHIIIIIWIFFEFFITIVIL', ' ')] PHJ18920701-V07-07-page23.txt: [('mitimiiiiiiiiiiimmitutimmiticium', ' ')] PHJ18921101-V07-11-page22.txt: [('Millillitilill', ' ')] PHJ19010301-V16-03-page30.txt: [('ttitttttttttittitt', ' ')] PHJ19010401-V16-04-page30.txt: [('IMIMMOSUMMWORANICVNIMMAIXIIMICil', ' ')] PHJ19010501-V16-05-page35.txt: [('iiiiIIIIIIIII', ' ')] PHJ19010901-V16-09-page33.txt: [('MIUMMIINIIMIMMORMIlleiriMMUNIPM', ' '), ('LttlitlYAMtkilletaitaillitialaailkill.failk', ' ')] PHJ19030901-V18-09-page2.txt: [('mmtvmszKommommeszmmtimmotmectoot', ' ')]
In [38]:
# %load shared_elements/summary.py
summary = GoH.reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/PHJ/correction8 Average verified rate: 0.9812646838465844 Average of error rates: 0.026498778049322377 Total token count: 2849645
In [39]:
# %load shared_elements/top_errors.py
errors_summary = GoH.reports.get_errors_summary( summary )
GoH.reports.top_errors( errors_summary, 10 )[:50]
Out[39]:
[('m', 2723), ('d', 2324), ("'", 1997), ('e', 1380), ('r', 1248), ('t', 1203), ('w', 1166), ('n', 1117), ('co', 1109), ('f', 795), ('g', 759), ('x', 648), ('lb', 583), ('sel', 251), ('th', 250), ('mo', 230), ('oo', 222), ('pp', 218), ('z', 215), ('k', 214), ('u', 196), ("an'", 192), ('q', 132), ('ex', 106), ('ournal', 105), ('al', 94), ('te', 85), ('oz', 81), ('ga', 80), ('ro', 77), ('pa', 74), ('va', 74), ('munn', 73), ('io', 72), ('-', 66), ('ti', 66), ("infants'", 61), ('id', 55), ('viperance', 54), ('zo', 53), ('em', 53), ('tion', 51), ("''", 49), ('si', 47), ('urnal', 47), ('fahr', 46), ('cc', 45), ("hours'", 44), ('cloe', 43), ('tt', 42)]
Correction 9 -- Separate Squashed Words¶
In [41]:
# %load shared_elements/separate_squashed_words.py
import pandas as pd
from math import log
prev = "correction8"
cycle = "correction9"
directories = GoH.utilities.define_directories(prev, cycle, base_dir)
if not os.path.exists(directories['cycle']):
os.makedirs(directories['cycle'])
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
verified_tokens = []
for filename in corpus:
content = GoH.utilities.readfile(directories['prev'], filename)
clean.get_approved_tokens(content, spelling_dictionary, verified_tokens)
tokens_with_freq = dict(collections.Counter(verified_tokens))
words = pd.DataFrame(list(tokens_with_freq.items()), columns=['token','freq'])
words_sorted = words.sort_values('freq', ascending=False)
words_sorted_short = words_sorted[words_sorted.freq > 2]
sorted_list_of_words = list(words_sorted_short['token'])
wordcost = dict((k, log((i+1)*log(len(sorted_list_of_words)))) for i,k in enumerate(sorted_list_of_words))
maxword = max(len(x) for x in sorted_list_of_words)
corpus = (f for f in listdir(directories['prev']) if not f.startswith('.') and isfile(join(directories['prev'], f)))
for filename in corpus:
content = GoH.utilities.readfile(directories['prev'], filename)
text = GoH.utilities.strip_punct(content)
tokens = GoH.utilities.tokenize_text(text)
replacements = []
for token in tokens:
if not token.lower() in spelling_dictionary:
if len(token) > 17:
if re.search(r"[\-\-\'\"]", token):
pass
else:
split_string = clean.infer_spaces(token, wordcost, maxword)
list_split_string = split_string.split()
if clean.verify_split_string(list_split_string, spelling_dictionary):
replacements.append((token, split_string))
else:
pass
else:
pass
else:
pass
if len(replacements) > 0:
print("{}: {}".format(filename, replacements))
for replacement in replacements:
content = GoH.clean.replace_pair(replacement, content)
else:
pass
with open(join(directories['cycle'], filename), mode="w") as o:
o.write(content)
o.close()
PHJ18860801-V02-02-page21.txt: [('painkillezejkillers', 'p a ink i l le z e j k i l l e r s')] PHJ18860801-V02-02-page25.txt: [('localititecalities', 'local it it e c a l i t i e s')] PHJ18860801-V02-02-page4.txt: [('immediatelyremoved', 'immediately removed')] PHJ18870401-V02-06-page24.txt: [('brownsugarinwhichahalfteaspoonpulverized', 'brown sugar in which a half teaspoon pulverized')] PHJ18870401-V02-06-page28.txt: [('DURINGTHECHRISTIAN', 'DURING THE CHRISTIAN')] PHJ18870801-V02-08-page29.txt: [('PACIFICPRESSPUBLISHINGROUSEOaldgildCal', 'PACIFIC PRESS PUBLISHING R O U S E O a l d g i l d C a l')] PHJ18870801-V02-08-page30.txt: [('householddictionary', 'household dictionary')] PHJ18871001-V02-09-page25.txt: [('precedingpreparations', 'preceding preparations')] PHJ18880301-V03-03-page26.txt: [('informationladdress', 'information lad dress')] PHJ18881101-V03-11-page27.txt: [('EECLICEIZLIECOUIZIEIFICINCIIMIC', 'E E C L I C E I Z L I E C O U I Z I E I F I C I N C I I M I C')] PHJ18890201-V04-02-page28.txt: [('theircrispnessitmayberestoredbyplacingtheminahotovenfor', 'their crispness it may be restored by placing them in a hot oven for'), ('precedingpreparations', 'preceding preparations')] PHJ18890301-V04-03-page32.txt: [('NEWHOMESEWINGMACHINE', 'NEW HOME SEWING MACHINE')] PHJ18890401-V04-04-page31.txt: [('theircrispnessitmayberestoredbyplacingtheminahotovenfor', 'their crispness it may be restored by placing them in a hot oven for')] PHJ18890501-V04-05-page21.txt: [('intinixtivillytively', 'in tin ix t i v i l l y t i v e l y'), ('hislItthitherother', 'his l It thither other')] PHJ18890501-V04-05-page24.txt: [('Iiicigslirplelasure', 'I ii c i g s l i r p l e l a s u r e'), ('clothtscgtiliehlbf', 'cloth t s c g t i l i e h l b f'), ('slfiesiiiWiSecibade', 's l fie s iii W i S e c i b a d e')] PHJ18890501-V04-05-page25.txt: [('ChristirailitNiifi', 'Christ i rail it N i i f i'), ('dAbclitsittliblished', 'd A b c l i t s i t t l i b l i s h e d'), ('goldengolilawnbrown', 'golden go l i lawn brown')] PHJ18890501-V04-05-page28.txt: [('advertielderitiderb', 'adv er tie l der it i der b'), ('OatmtkitilieallBiscuit', 'Oat m t k i t i l i e a l l B i s c u i t'), ('contaiaonitiiituidthing', 'c ont ai a on it ii it u i d t h i n g'), ('aerillaissIthsbugh', 'a er ill a is s It h s bug h'), ('cbtherideletenious', 'c b the ride let en i o us'), ('litiadziaidpeizially', 'lit i ad z i a i d p e i z i a l l y'), ('substittatbstitutdisper', 'sub st it tat b st it u t d i s p e r'), ('iihwalithlipainful', 'ii h w a l i t h l i p a i n f u l'), ('drictithietitstfrd', 'dr i c tit hi et it st f r d')] PHJ18890601-V04-06-page27.txt: [('onthsallshallbecomeyourown', 'ont h s all shall become your own')] PHJ18891001-V04-10-page29.txt: [('Wasatworkonafarmfor', 'Was at work on a farm for')] PHJ18891101-V04-11-page3.txt: [('sanguiniolymphatic', 'sang u i n i o l y m p h a t i c')] PHJ18891201-V04-12-page34.txt: [('Spiritualeffectsofintemperance', 'Spiritual effects of intemperance')] PHJ18900201-V05-02-page3.txt: [('Dangerouslycontaminated', 'Dangerous l y contaminated')] PHJ18900301-V05-03-page23.txt: [('oftencircumstances', 'often circumstances')] PHJ18900301-V05-03-page30.txt: [('CliinoindhWialtceh', 'C l ii no in d h W i a l t c e h')] PHJ18900301-V05-03-page32.txt: [('SCIENTIFICAMERICAN', 'SCIENTIFIC AMERICAN')] PHJ18900401-V05-04-page15.txt: [('thoroughlyorganized', 'thoroughly organized')] PHJ18900701-V05-07-page28.txt: [('willEbcuyatheFsORDiroLlLRATOICRP', 'will E b c u y a t h e F s O R D i r o L l L R A T O I C R P')] PHJ18900801-V05-08-page28.txt: [('HowtoDressHealthfully', 'How to Dress Healthfully')] PHJ18900801-V05-08-page31.txt: [('SCIENTIFICAMERICAN', 'SCIENTIFIC AMERICAN')] PHJ18900901-V05-09-page29.txt: [('thepitOirtroLBLATTT', 'the pit O ir t r o L B L A T T T')] PHJ18900901-V05-09-page31.txt: [('SCIENTIFICAMERICAN', 'SCIENTIFIC AMERICAN')] PHJ18901001-V05-10-page31.txt: [('ANTEDineveranYshow', 'ANT ED in ever an Y show'), ('WHOCHANGEDTHESABBATH', 'WHO CHANGED THE SABBATH')] PHJ18901201-V05-12-page11.txt: [('barbaphysiologists', 'bar b a physiologists')] PHJ18901201-V05-12-page31.txt: [('THEONLYSEWINGMACHINE', 'THE ONLY SEWING MACHINE'), ('NEWHOMESEWINGMACHINECo', 'NEW HOME SEWING MACHINE C o'), ('Wefurnishpatternsforhigh', 'We furnish patterns for high')] PHJ18910101-V06-01-page31.txt: [('THEONLYSEWINGMACHINE', 'THE ONLY SEWING MACHINE'), ('PIVECTSATISTICTION', 'P IV E C T S A T I S T I C T I O N')] PHJ18910301-V06-03-page31.txt: [('NEWHOMESEWINGMACHINEaORTINGE', 'NEW HOME SEWING MACHINE a OR TIN G E'), ('ctiMioprIpliardeaey', 'c t i M i o p r I p l i a r d e a e y')] PHJ18910401-V06-04-page31.txt: [('THEONLYSEWINGMACHINE', 'THE ONLY SEWING MACHINE')] PHJ18910501-V06-05-page31.txt: [('HOMESEWINGMACHINEaORANGEMASS', 'HOME SEWING MACHINE a ORANGE MASS')] PHJ18910601-V06-06-page2.txt: [('imperfectlycleansed', 'imperfectly cleansed')] PHJ18910601-V06-06-page32.txt: [('laWrillifiliaMiniWi', 'la W r i l l i f i l i a M i n i W i')] PHJ18910801-V06-08-page8.txt: [('constantlyincreasing', 'constantly increasing')] PHJ18910901-V06-09-page11.txt: [('comfortablycircumstanced', 'comfortably circumstanced')] PHJ18911001-V06-10-page27.txt: [('consumptionbreeding', 'consumption breeding')] PHJ18911001-V06-10-page4.txt: [('supercarbonization', 'sup er carbon i z a t i o n')] PHJ18911101-V06-11-page31.txt: [('descriptivearticles', 'descriptive articles')] PHJ18911201-V06-12-page31.txt: [('PACIFICPRESSPUBLISHINGHOUSE', 'PACIFIC PRESS PUBLISHING HOUSE')] PHJ18920901-V07-09-page25.txt: [('butthebodywithoutasoulisonlya', 'but the body without a soul is only a')] PHJ18920901-V07-09-page32.txt: [('ForSimplicityitBeatatheWorld', 'For Simplicity it Beat a the World')] PHJ18921101-V07-11-page30.txt: [('avoIddanarousbarbi', 'a v o I d d a n a r o u s b a r b i')] PHJ18960501-V11-05-page6.txt: [('supersensitiveness', 'sup er sensitiveness')] PHJ18961101-V11-11-page32.txt: [('ingSliimiediullIVIgi', 'ing S l i im i ed i u l l I V I g i')] PHJ18990401-V14-04-page16.txt: [('accomplishmentwhichhascomevery', 'accomplishment which has come very')] PHJ18990701-V14-07-page18.txt: [('supersensitiveness', 'sup er sensitiveness')] PHJ18990901-V14-09-page29.txt: [('poisonoussubstances', 'poisonous substances')] PHJ19010201-V16-02-page33.txt: [('bfilVilarliftightklalthiglitilaritiontintarAblanklM', 'b f i l V i l a r l i f t i g h t k l a l t h i g l i t i l a r i t i o n t i n t a r A b l a n k l M')] PHJ19010501-V16-05-page4.txt: [('liberallyfurnished', 'liberally furnished')] PHJ19010701-V16-07-page29.txt: [('SanFranciscoAgents', 'San Francisco Agents')] PHJ19010701-V16-07-page33.txt: [('MIONVIIMIlliMaRkliblOgeMtialk', 'M I O N V I I M I l l i M a R k l i b l O g e M t i a l k'), ('aamenontmenalwanunionawaimmatatatem', 'a a men ont men a l wan union a w a i m m a t a t a t e m')] PHJ19010801-V16-08-page29.txt: [('pleasantdisinfectant', 'pleasant disinfectant')] PHJ19010801-V16-08-page33.txt: [('WEVONElitlitrAkliailIAMEM', 'WE V ONE lit lit r A k l i a i l I A M E M')] PHJ19011201-V16-12-page35.txt: [('carefullycontrolled', 'carefully controlled')] PHJ19011201-V16-12-page43.txt: [('GOODHEALTHRESTAURANT', 'GOOD HEALTH RESTAURANT')] PHJ19020401-V17-04-page7.txt: [('delicatelybalanced', 'delicately balanced')] PHJ19020501-V17-05-page18.txt: [('diseasecleanliness', 'disease cleanliness')] PHJ19020501-V17-05-page19.txt: [('satisfacantiseptic', 'sat is f a c a n t i s e p t i c')] PHJ19020801-V17-08-page2.txt: [('curawarararanzaTararom', 'cur a war ar a ran z a T a r a r o m')] PHJ19030801-V18-08-page17.txt: [('noncommunicability', 'non communicability'), ('intercommunicability', 'inter communicability')] PHJ19030801-V18-08-page35.txt: [('Noixoffeacitexcmicam', 'No ix of fe a cite x c m i c a m')] PHJ19031001-V18-10-page36.txt: [('Witagitiniftititiat', 'Wit a git in if tit it i at')] PHJ19031101-V18-11-page36.txt: [('SanitariumSanitarium', 'Sanitarium Sanitarium')] PHJ19031201-V18-12-page36.txt: [('HelenaSanitariumRS', 'Helena Sanitarium R S')] PHJ19040101-V19-01-page24.txt: [('regularlyprescribed', 'regularly prescribed')] PHJ19040601-V19-06-page14.txt: [('aplebaredsohlaubtitonstriasiaglhl', 'a p le bar ed so h l au b tit on str i as i a g l h l')] PHJ19040601-V19-06-page9.txt: [('imperfectlymasticated', 'imperfectly masticated')]
In [42]:
# %load shared_elements/summary.py
summary = GoH.reports.overview_report(directories['cycle'], spelling_dictionary, title)
Directory: /Users/jeriwieringa/Dissertation/text/text/2017-01-31-corpus-with-utf8-split-into-titles-cleaning/PHJ/correction9 Average verified rate: 0.9812348137373975 Average of error rates: 0.026514996667407243 Total token count: 2850438
In [43]:
# %load shared_elements/top_errors.py
errors_summary = GoH.reports.get_errors_summary( summary )
GoH.reports.top_errors( errors_summary, 10 )[:50]
Out[43]:
[('m', 2741), ('d', 2338), ("'", 1997), ('e', 1411), ('r', 1272), ('t', 1245), ('w', 1172), ('n', 1131), ('co', 1109), ('f', 805), ('g', 769), ('x', 649), ('lb', 583), ('sel', 251), ('th', 250), ('mo', 230), ('z', 222), ('oo', 222), ('k', 221), ('pp', 218), ('u', 207), ("an'", 192), ('q', 132), ('ex', 106), ('ournal', 105), ('al', 94), ('te', 85), ('oz', 81), ('ga', 80), ('ro', 77), ('pa', 74), ('va', 74), ('munn', 73), ('io', 72), ('-', 66), ('ti', 66), ("infants'", 61), ('id', 55), ('viperance', 54), ('zo', 53), ('em', 53), ('tion', 51), ("''", 49), ('si', 47), ('urnal', 47), ('fahr', 46), ('cc', 45), ("hours'", 44), ('cloe', 43), ('tt', 42)]
In [ ]: