Generate Noun Phrases List

Moving the generation of noun phrases out of the streaming process, as it takes about 2 hours.

In [1]:
from textblob import TextBlob
import gensim
from gensim.parsing.preprocessing import STOPWORDS
import os
import sys
import re
import tarfile
import itertools
import logging
import nltk
In [2]:
# %load ../shared_elements/logging.py
# http://stackoverflow.com/questions/35936086/
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# Create STDERR handler
handler = logging.StreamHandler(sys.stderr)
# ch.setLevel(logging.DEBUG)

# Create formatter and add it to the handler
formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

# Set STDERR handler as the only handler 
logger.handlers = [handler]
In [3]:
def process_page(page):
    """
    Preprocess a single periodical page, returning the result as
    a unicode string.
    """
    content = gensim.utils.to_unicode(page, 'utf8').strip()

    """
    Cleans up the special characters in the text to those we would expect in the corpus.
    Leaves punctuation, which may result in additional noise. 
    Removes all accented characters. There is a higher rate of messy OCR reporting 
    accented characters than use in this corpus of languages other than English. 
    This approach removes from study questions of non-English language use, but
    significantly reduces OCR noise.
    """
    content = re.sub(r"[^a-zA-Z]", " ", content)
    
    return content
In [4]:
def iter_Periodicals(fname, log_every=None):
    """
    Yield plain text of each periodical page, as a unicode string.

    The pages are read from the directory `` on disk.
    (e.g. `/`)

    """
    extracted = 0
    with tarfile.open(fname, 'r:gz') as tf:
        for file_number, file_info in enumerate(tf):
            if file_info.isfile():
                if log_every and extracted % log_every == 0:
                    logging.info("extracting file #%i: %s" % (extracted, file_info.name))
                content = tf.extractfile(file_info).read()
                yield process_page(content)
                extracted += 1
In [5]:
def head(stream, n=10):
    """Convenience fnc: return the first `n` elements of the stream, as plain list."""
    return list(itertools.islice(stream, n))

def best_phrases(document_stream, top_n=2000, prune_at=100000):
    """Return a set of `top_n` most common noun phrases."""
    np_counts = {}
    for docno, doc in enumerate(document_stream):
        # prune out infrequent phrases from time to time, to save RAM.
        # the result may not be completely accurate because of this step
        if docno % 1000 == 0:
            sorted_phrases = sorted(np_counts.items(), key=lambda item: -item[1])
            np_counts = dict(sorted_phrases[:prune_at])
            logging.info("at document #%i, considering %i phrases: %s..." %
                         (docno, len(np_counts), head(sorted_phrases)))
        
        # how many times have we seen each noun phrase?
        for np in TextBlob(doc).noun_phrases:
            # only consider multi-word NEs where each word contains at least one letter
            if u' ' not in np:
                continue
            # ignore phrases that contain too short/non-alphabetic words
            if all(word.isalpha() and len(word) > 2 for word in np.split()):
                np_counts[np] = np_counts.get(np, 0) + 1

    sorted_phrases = sorted(np_counts, key=lambda np: -np_counts[np])
    return set(head(sorted_phrases, top_n))
In [6]:
def corpus_ne_phrases(corpus):
    logging.info("collecting entities from %s" % corpus)
    doc_stream = iter_Periodicals(corpus)
    entities = best_phrases(doc_stream)
    logging.info("selected %i entities: %s..." %
                 (len(entities), list(entities)[:10]))

    return entities
In [7]:
corpus = '/Users/jeriwieringa/Dissertation/text/text/2017-04-docs-for-whole-corpus-model.tar.gz'
In [8]:
phrases = corpus_ne_phrases(corpus)
root - INFO - collecting entities from /Users/jeriwieringa/Dissertation/text/text/2017-04-docs-for-whole-corpus-model.tar.gz
root - INFO - at document #0, considering 0 phrases: []...
root - INFO - at document #1000, considering 21082 phrases: [('the advocate', 549), ('christian education', 365), ('church school', 264), ('church schools', 234), ('public schools', 141), ('young people', 138), ('school work', 130), ('young men', 124), ('educational work', 112), ('christian schools', 110)]...
root - INFO - at document #2000, considering 49991 phrases: [('the advocate', 907), ('christian education', 518), ('public schools', 423), ('church school', 334), ('church schools', 268), ('civil government', 257), ('young people', 246), ('christian religion', 243), ('american sentinel', 224), ('school work', 222)]...
root - INFO - at document #3000, considering 80276 phrases: [('public schools', 1193), ('the advocate', 908), ('civil government', 855), ('christian religion', 560), ('christian education', 521), ('the american sentinel', 501), ('american sentinel', 478), ('religious liberty', 468), ('religious legislation', 465), ('jesus christ', 443)]...
root - INFO - at document #4000, considering 100000 phrases: [('public schools', 1376), ('civil government', 1151), ('the advocate', 908), ('jesus christ', 856), ('religious liberty', 789), ('christian religion', 761), ('supreme court', 736), ('civil law', 725), ('catholic church', 706), ('religious legislation', 699)]...
root - INFO - at document #5000, considering 100000 phrases: [('civil government', 1469), ('public schools', 1451), ('jesus christ', 1236), ('religious liberty', 1226), ('civil law', 1038), ('supreme court', 1034), ('catholic church', 1011), ('american sentinel', 951), ('christian religion', 911), ('the advocate', 908)]...
root - INFO - at document #6000, considering 100000 phrases: [('civil government', 1634), ('american sentinel', 1625), ('public schools', 1578), ('jesus christ', 1480), ('religious liberty', 1409), ('supreme court', 1164), ('civil law', 1127), ('catholic church', 1113), ('christian religion', 970), ('the advocate', 908)]...
root - INFO - at document #7000, considering 100000 phrases: [('american sentinel', 2302), ('civil government', 1755), ('jesus christ', 1653), ('public schools', 1611), ('religious liberty', 1550), ('catholic church', 1382), ('supreme court', 1248), ('civil law', 1170), ('christian religion', 1020), ('civil power', 911)]...
root - INFO - at document #8000, considering 100000 phrases: [('american sentinel', 2590), ('civil government', 1933), ('jesus christ', 1764), ('public schools', 1689), ('religious liberty', 1676), ('catholic church', 1484), ('supreme court', 1337), ('civil law', 1246), ('christian religion', 1057), ('civil power', 958)]...
root - INFO - at document #9000, considering 100000 phrases: [('american sentinel', 2590), ('civil government', 1943), ('jesus christ', 1771), ('public schools', 1717), ('religious liberty', 1678), ('catholic church', 1487), ('supreme court', 1337), ('civil law', 1246), ('christian education', 1245), ('christian religion', 1061)]...
root - INFO - at document #10000, considering 100000 phrases: [('american sentinel', 2590), ('civil government', 1944), ('jesus christ', 1798), ('public schools', 1758), ('religious liberty', 1681), ('christian education', 1551), ('catholic church', 1490), ('supreme court', 1339), ('civil law', 1246), ('christian religion', 1063)]...
root - INFO - at document #11000, considering 100000 phrases: [('american sentinel', 2590), ('civil government', 1947), ('jesus christ', 1820), ('public schools', 1807), ('religious liberty', 1683), ('christian education', 1645), ('catholic church', 1492), ('supreme court', 1339), ('civil law', 1246), ('christian religion', 1065)]...
root - INFO - at document #12000, considering 100000 phrases: [('american sentinel', 2590), ('civil government', 1950), ('jesus christ', 1870), ('public schools', 1838), ('religious liberty', 1708), ('christian education', 1703), ('catholic church', 1499), ('supreme court', 1347), ('civil law', 1252), ('young people', 1175)]...
root - INFO - at document #13000, considering 100000 phrases: [('american sentinel', 2590), ('civil government', 1965), ('jesus christ', 1949), ('public schools', 1873), ('christian education', 1773), ('religious liberty', 1769), ('young people', 1631), ('catholic church', 1519), ('supreme court', 1348), ('civil law', 1257)]...
root - INFO - at document #14000, considering 100000 phrases: [('american sentinel', 2590), ('young people', 2055), ('jesus christ', 2000), ('civil government', 1983), ('public schools', 1892), ('christian education', 1846), ('religious liberty', 1810), ('catholic church', 1529), ('supreme court', 1354), ('civil law', 1257)]...
root - INFO - at document #15000, considering 100000 phrases: [('american sentinel', 2590), ('young people', 2457), ('jesus christ', 2106), ('civil government', 1993), ('public schools', 1909), ('christian education', 1870), ('religious liberty', 1828), ('camp meeting', 1545), ('catholic church', 1532), ('supreme court', 1359)]...
root - INFO - at document #16000, considering 100000 phrases: [('young people', 2816), ('american sentinel', 2590), ('jesus christ', 2148), ('civil government', 1997), ('christian education', 1962), ('public schools', 1952), ('camp meeting', 1901), ('religious liberty', 1838), ('catholic church', 1538), ('young men', 1471)]...
root - INFO - at document #17000, considering 100000 phrases: [('young people', 2864), ('american sentinel', 2590), ('jesus christ', 2278), ('civil government', 2017), ('public schools', 1988), ('christian education', 1984), ('camp meeting', 1950), ('religious liberty', 1906), ('general conference', 1756), ('catholic church', 1544)]...
root - INFO - at document #18000, considering 100000 phrases: [('jesus christ', 3226), ('young people', 2887), ('american sentinel', 2590), ('general conference', 2143), ('civil government', 2030), ('public schools', 2000), ('christian education', 1990), ('camp meeting', 1979), ('religious liberty', 1952), ('catholic church', 1594)]...
root - INFO - at document #19000, considering 100000 phrases: [('jesus christ', 3605), ('young people', 2989), ('general conference', 2824), ('american sentinel', 2591), ('civil government', 2033), ('public schools', 2020), ('camp meeting', 2013), ('christian education', 1998), ('religious liberty', 1982), ('young men', 1676)]...
root - INFO - at document #20000, considering 100000 phrases: [('general conference', 3993), ('jesus christ', 3834), ('young people', 3458), ('american sentinel', 2591), ('religious liberty', 2064), ('camp meeting', 2062), ('christian education', 2059), ('civil government', 2051), ('public schools', 2039), ('young men', 1914)]...
root - INFO - at document #21000, considering 100000 phrases: [('general conference', 4186), ('jesus christ', 4122), ('young people', 3598), ('american sentinel', 2591), ('camp meeting', 2093), ('christian education', 2075), ('religious liberty', 2071), ('civil government', 2061), ('public schools', 2052), ('young men', 2027)]...
root - INFO - at document #22000, considering 100000 phrases: [('general conference', 4268), ('jesus christ', 4196), ('young people', 3752), ('american sentinel', 2591), ('camp meeting', 2336), ('young men', 2130), ('christian education', 2111), ('religious liberty', 2078), ('public schools', 2070), ('civil government', 2064)]...
root - INFO - at document #23000, considering 100000 phrases: [('jesus christ', 4457), ('general conference', 4305), ('young people', 3783), ('american sentinel', 2591), ('camp meeting', 2404), ('young men', 2157), ('christian education', 2113), ('religious liberty', 2102), ('public schools', 2088), ('civil government', 2070)]...
root - INFO - at document #24000, considering 100000 phrases: [('jesus christ', 4543), ('general conference', 4401), ('young people', 3814), ('american sentinel', 2591), ('camp meeting', 2457), ('young men', 2237), ('religious liberty', 2220), ('civil government', 2148), ('public schools', 2119), ('christian education', 2114)]...
root - INFO - at document #25000, considering 100000 phrases: [('jesus christ', 4766), ('general conference', 4492), ('young people', 3835), ('american sentinel', 2591), ('camp meeting', 2492), ('religious liberty', 2281), ('young men', 2273), ('civil government', 2195), ('public schools', 2129), ('christian education', 2121)]...
root - INFO - at document #26000, considering 100000 phrases: [('jesus christ', 4819), ('general conference', 4539), ('young people', 3862), ('american sentinel', 2591), ('camp meeting', 2499), ('young men', 2338), ('religious liberty', 2287), ('civil government', 2198), ('public schools', 2137), ('christian education', 2121)]...
root - INFO - at document #27000, considering 100000 phrases: [('jesus christ', 4837), ('general conference', 4541), ('young people', 3875), ('american sentinel', 2591), ('camp meeting', 2502), ('young men', 2385), ('religious liberty', 2287), ('civil government', 2199), ('public schools', 2143), ('christian education', 2121)]...
root - INFO - at document #28000, considering 100000 phrases: [('jesus christ', 4853), ('general conference', 4541), ('young people', 3886), ('american sentinel', 2591), ('camp meeting', 2503), ('young men', 2434), ('religious liberty', 2287), ('civil government', 2199), ('public schools', 2145), ('christian education', 2121)]...
root - INFO - at document #29000, considering 100000 phrases: [('jesus christ', 4855), ('general conference', 4541), ('young people', 3891), ('american sentinel', 2591), ('camp meeting', 2503), ('young men', 2484), ('religious liberty', 2288), ('the health reformer', 2277), ('civil government', 2199), ('public schools', 2154)]...
root - INFO - at document #30000, considering 100000 phrases: [('jesus christ', 4859), ('general conference', 4541), ('young people', 3903), ('american sentinel', 2591), ('young men', 2552), ('the health reformer', 2550), ('camp meeting', 2503), ('religious liberty', 2288), ('civil government', 2199), ('public schools', 2169)]...
root - INFO - at document #31000, considering 100000 phrases: [('jesus christ', 4863), ('general conference', 4542), ('young people', 3910), ('young men', 2604), ('american sentinel', 2591), ('the health reformer', 2550), ('camp meeting', 2504), ('religious liberty', 2288), ('civil government', 2199), ('good health', 2189)]...
root - INFO - at document #32000, considering 100000 phrases: [('jesus christ', 4864), ('general conference', 4542), ('young people', 3920), ('young men', 2631), ('american sentinel', 2591), ('the health reformer', 2550), ('camp meeting', 2504), ('good health', 2476), ('religious liberty', 2289), ('public schools', 2202)]...
root - INFO - at document #33000, considering 100000 phrases: [('jesus christ', 4866), ('general conference', 4542), ('young people', 3932), ('young men', 2689), ('good health', 2644), ('american sentinel', 2591), ('the health reformer', 2550), ('camp meeting', 2504), ('religious liberty', 2289), ('public schools', 2213)]...
root - INFO - at document #34000, considering 100000 phrases: [('jesus christ', 4866), ('general conference', 4543), ('young people', 3952), ('good health', 2820), ('young men', 2753), ('american sentinel', 2591), ('the health reformer', 2550), ('camp meeting', 2505), ('religious liberty', 2289), ('public schools', 2242)]...
root - INFO - at document #35000, considering 100000 phrases: [('jesus christ', 4867), ('general conference', 4543), ('young people', 3968), ('good health', 2974), ('young men', 2801), ('american sentinel', 2591), ('the health reformer', 2550), ('camp meeting', 2505), ('religious liberty', 2289), ('present time', 2264)]...
root - INFO - at document #36000, considering 100000 phrases: [('jesus christ', 4867), ('general conference', 4543), ('young people', 3981), ('good health', 3133), ('young men', 2817), ('american sentinel', 2591), ('the health reformer', 2550), ('camp meeting', 2505), ('present time', 2331), ('religious liberty', 2289)]...
root - INFO - at document #37000, considering 100000 phrases: [('jesus christ', 4867), ('general conference', 4543), ('young people', 3992), ('good health', 3250), ('young men', 2858), ('american sentinel', 2591), ('the health reformer', 2550), ('camp meeting', 2505), ('present time', 2367), ('public schools', 2331)]...
root - INFO - at document #38000, considering 100000 phrases: [('jesus christ', 4870), ('general conference', 4543), ('young people', 4002), ('good health', 3380), ('young men', 2890), ('american sentinel', 2591), ('the health reformer', 2550), ('camp meeting', 2505), ('present time', 2440), ('public schools', 2346)]...
root - INFO - at document #39000, considering 100000 phrases: [('jesus christ', 4873), ('general conference', 4543), ('young people', 4010), ('good health', 3494), ('young men', 2912), ('american sentinel', 2591), ('the health reformer', 2550), ('camp meeting', 2505), ('present time', 2482), ('public schools', 2355)]...
root - INFO - at document #40000, considering 100000 phrases: [('jesus christ', 4873), ('general conference', 4543), ('young people', 4017), ('good health', 3710), ('young men', 2932), ('american sentinel', 2591), ('the health reformer', 2550), ('present time', 2518), ('camp meeting', 2505), ('public schools', 2364)]...
root - INFO - at document #41000, considering 100000 phrases: [('jesus christ', 4875), ('general conference', 4543), ('young people', 4023), ('good health', 3931), ('young men', 2945), ('american sentinel', 2591), ('the health reformer', 2550), ('present time', 2544), ('camp meeting', 2505), ('public schools', 2367)]...
root - INFO - at document #42000, considering 100000 phrases: [('jesus christ', 4876), ('general conference', 4543), ('good health', 4206), ('young people', 4036), ('young men', 2971), ('american sentinel', 2591), ('present time', 2568), ('the health reformer', 2550), ('camp meeting', 2505), ('public schools', 2378)]...
root - INFO - at document #43000, considering 100000 phrases: [('jesus christ', 4877), ('good health', 4560), ('general conference', 4543), ('young people', 4041), ('young men', 2987), ('present time', 2593), ('american sentinel', 2591), ('the health reformer', 2550), ('camp meeting', 2505), ('public schools', 2387)]...
root - INFO - at document #44000, considering 100000 phrases: [('good health', 5007), ('jesus christ', 4884), ('general conference', 4601), ('young people', 4140), ('young men', 3006), ('present time', 2641), ('camp meeting', 2599), ('american sentinel', 2591), ('the health reformer', 2550), ('public schools', 2413)]...
root - INFO - at document #45000, considering 100000 phrases: [('good health', 5048), ('jesus christ', 5047), ('general conference', 4716), ('young people', 4333), ('young men', 3077), ('camp meeting', 2844), ('present time', 2686), ('american sentinel', 2591), ('the health reformer', 2550), ('public schools', 2421)]...
root - INFO - at document #46000, considering 100000 phrases: [('jesus christ', 5155), ('good health', 5116), ('general conference', 4716), ('young people', 4415), ('young men', 3153), ('the life boat', 3045), ('camp meeting', 2847), ('present time', 2714), ('american sentinel', 2591), ('the health reformer', 2550)]...
root - INFO - at document #47000, considering 100000 phrases: [('jesus christ', 5254), ('good health', 5160), ('general conference', 4716), ('young people', 4461), ('the life boat', 4342), ('young men', 3212), ('camp meeting', 2849), ('present time', 2733), ('american sentinel', 2591), ('young man', 2587)]...
root - INFO - at document #48000, considering 100000 phrases: [('the life boat', 5715), ('jesus christ', 5406), ('good health', 5198), ('general conference', 4716), ('young people', 4500), ('young men', 3259), ('camp meeting', 2852), ('present time', 2749), ('young man', 2735), ('american sentinel', 2591)]...
root - INFO - at document #49000, considering 100000 phrases: [('the life boat', 6997), ('jesus christ', 5516), ('good health', 5209), ('general conference', 4717), ('young people', 4587), ('young men', 3298), ('camp meeting', 2854), ('young man', 2845), ('present time', 2762), ('american sentinel', 2591)]...
root - INFO - at document #50000, considering 100000 phrases: [('the life boat', 8077), ('jesus christ', 5579), ('good health', 5221), ('general conference', 4718), ('young people', 4650), ('young men', 3344), ('young man', 2935), ('camp meeting', 2861), ('present time', 2784), ('american sentinel', 2591)]...
root - INFO - at document #51000, considering 100000 phrases: [('the life boat', 9162), ('jesus christ', 5674), ('good health', 5228), ('general conference', 4720), ('young people', 4700), ('young men', 3402), ('young man', 3018), ('camp meeting', 2872), ('present time', 2808), ('american sentinel', 2591)]...
root - INFO - at document #52000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 5697), ('good health', 5259), ('young people', 4752), ('general conference', 4722), ('young men', 3446), ('young man', 3060), ('camp meeting', 2875), ('present time', 2830), ('american sentinel', 2591)]...
root - INFO - at document #53000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 5704), ('good health', 5307), ('young people', 4774), ('general conference', 4723), ('young men', 3475), ('young man', 3091), ('camp meeting', 2876), ('present time', 2850), ('american sentinel', 2591)]...
root - INFO - at document #54000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 5709), ('good health', 5338), ('young people', 4781), ('general conference', 4730), ('young men', 3493), ('young man', 3108), ('camp meeting', 2877), ('present time', 2866), ('american sentinel', 2591)]...
root - INFO - at document #55000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 5709), ('good health', 5358), ('young people', 4792), ('general conference', 4730), ('young men', 3516), ('young man', 3124), ('present time', 2882), ('camp meeting', 2878), ('american sentinel', 2591)]...
root - INFO - at document #56000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 5712), ('good health', 5390), ('young people', 4806), ('general conference', 4730), ('young men', 3543), ('young man', 3154), ('present time', 2897), ('camp meeting', 2878), ('american sentinel', 2591)]...
root - INFO - at document #57000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 5713), ('good health', 5414), ('young people', 4816), ('general conference', 4730), ('young men', 3570), ('young man', 3179), ('present time', 2910), ('camp meeting', 2878), ('short time', 2615)]...
root - INFO - at document #58000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 5715), ('good health', 5445), ('young people', 4826), ('general conference', 4733), ('young men', 3587), ('young man', 3200), ('present time', 2929), ('camp meeting', 2878), ('short time', 2647)]...
root - INFO - at document #59000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 5769), ('good health', 5474), ('young people', 4829), ('general conference', 4749), ('young men', 3621), ('young man', 3223), ('present time', 2949), ('camp meeting', 2879), ('cold water', 2671)]...
root - INFO - at document #60000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 5913), ('good health', 5474), ('young people', 4837), ('general conference', 4766), ('young men', 3642), ('young man', 3233), ('present time', 2995), ('camp meeting', 2879), ('religious liberty', 2735)]...
root - INFO - at document #61000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 6035), ('good health', 5474), ('young people', 4846), ('general conference', 4769), ('young men', 3661), ('young man', 3239), ('present time', 3020), ('religious liberty', 2989), ('camp meeting', 2880)]...
root - INFO - at document #62000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 6095), ('good health', 5483), ('young people', 5207), ('general conference', 5019), ('young men', 3791), ('young man', 3330), ('camp meeting', 3282), ('present time', 3089), ('religious liberty', 3054)]...
root - INFO - at document #63000, considering 100000 phrases: [('the life boat', 9502), ('jesus christ', 6134), ('young people', 5730), ('good health', 5493), ('general conference', 5320), ('camp meeting', 3905), ('young men', 3902), ('young man', 3417), ('present time', 3154), ('religious liberty', 3080)]...
root - INFO - at document #64000, considering 100000 phrases: [('the life boat', 9502), ('young people', 6387), ('jesus christ', 6189), ('good health', 5503), ('general conference', 5389), ('camp meeting', 4376), ('young men', 4019), ('young man', 3510), ('present time', 3217), ('religious liberty', 3104)]...
root - INFO - at document #65000, considering 100000 phrases: [('the life boat', 9502), ('young people', 6890), ('jesus christ', 6211), ('general conference', 5536), ('good health', 5510), ('camp meeting', 4916), ('young men', 4211), ('young man', 3603), ('present time', 3270), ('missionary work', 3200)]...
root - INFO - at document #66000, considering 100000 phrases: [('the life boat', 9502), ('young people', 7402), ('jesus christ', 6246), ('general conference', 5650), ('good health', 5521), ('camp meeting', 5231), ('young men', 4434), ('young man', 3688), ('conference office', 3441), ('missionary work', 3390)]...
root - INFO - at document #67000, considering 100000 phrases: [('the life boat', 9502), ('young people', 7571), ('jesus christ', 6276), ('general conference', 5709), ('good health', 5578), ('camp meeting', 5293), ('young men', 4507), ('young man', 3790), ('conference office', 3538), ('missionary work', 3429)]...
root - INFO - at document #68000, considering 100000 phrases: [('the life boat', 9502), ('young people', 7583), ('jesus christ', 6280), ('general conference', 5709), ('good health', 5665), ('camp meeting', 5293), ('young men', 4559), ('young man', 3890), ('conference office', 3538), ('present time', 3446)]...
root - INFO - at document #69000, considering 100000 phrases: [('the life boat', 9502), ('young people', 7598), ('jesus christ', 6282), ('general conference', 5709), ('good health', 5706), ('camp meeting', 5295), ('young men', 4582), ('young man', 3940), ('conference office', 3538), ('present time', 3485)]...
root - INFO - at document #70000, considering 100000 phrases: [('the life boat', 9502), ('young people', 7611), ('jesus christ', 6287), ('good health', 5745), ('general conference', 5709), ('camp meeting', 5295), ('young men', 4625), ('young man', 3984), ('conference office', 3538), ('present time', 3509)]...
root - INFO - at document #71000, considering 100000 phrases: [('the life boat', 9502), ('young people', 7815), ('jesus christ', 6393), ('general conference', 5834), ('good health', 5768), ('camp meeting', 5472), ('young men', 4729), ('young man', 4049), ('present time', 3578), ('missionary work', 3571)]...
root - INFO - at document #72000, considering 100000 phrases: [('the life boat', 9502), ('young people', 8210), ('jesus christ', 6446), ('general conference', 6010), ('camp meeting', 5822), ('good health', 5785), ('young men', 4825), ('young man', 4134), ('missionary work', 3732), ('present time', 3663)]...
root - INFO - at document #73000, considering 100000 phrases: [('the life boat', 9502), ('young people', 8608), ('jesus christ', 6478), ('camp meeting', 6247), ('general conference', 6228), ('good health', 5795), ('young men', 5018), ('young man', 4241), ('missionary work', 3903), ('present time', 3742)]...
root - INFO - at document #74000, considering 100000 phrases: [('the life boat', 9502), ('young people', 9061), ('camp meeting', 6672), ('general conference', 6545), ('jesus christ', 6527), ('good health', 5801), ('young men', 5150), ('young man', 4371), ('missionary work', 4072), ('present time', 3833)]...
root - INFO - at document #75000, considering 100000 phrases: [('the life boat', 9502), ('young people', 9425), ('camp meeting', 7160), ('general conference', 6648), ('jesus christ', 6566), ('good health', 5808), ('young men', 5250), ('young man', 4456), ('missionary work', 4213), ('present time', 3891)]...
root - INFO - at document #76000, considering 100000 phrases: [('young people', 9752), ('the life boat', 9502), ('camp meeting', 7469), ('general conference', 6843), ('jesus christ', 6651), ('good health', 5811), ('young men', 5363), ('young man', 4507), ('missionary work', 4329), ('present time', 3960)]...
root - INFO - at document #77000, considering 100000 phrases: [('young people', 9754), ('the life boat', 9502), ('camp meeting', 7471), ('jesus christ', 7297), ('general conference', 6858), ('good health', 5816), ('young men', 5378), ('young man', 4560), ('missionary work', 4329), ('present time', 4125)]...
root - INFO - at document #78000, considering 100000 phrases: [('young people', 9757), ('the life boat', 9502), ('jesus christ', 7741), ('camp meeting', 7478), ('general conference', 6883), ('good health', 5818), ('young men', 5411), ('young man', 4634), ('missionary work', 4329), ('present time', 4250)]...
root - INFO - at document #79000, considering 100000 phrases: [('young people', 9761), ('the life boat', 9502), ('jesus christ', 8143), ('camp meeting', 7485), ('general conference', 6910), ('good health', 5825), ('young men', 5433), ('young man', 4709), ('present truth', 4685), ('present time', 4351)]...
root - INFO - at document #80000, considering 100000 phrases: [('young people', 9766), ('the life boat', 9502), ('jesus christ', 8559), ('camp meeting', 7488), ('general conference', 7022), ('good health', 5833), ('young men', 5469), ('present truth', 5041), ('young man', 4760), ('present time', 4451)]...
root - INFO - at document #81000, considering 100000 phrases: [('young people', 9776), ('the life boat', 9502), ('jesus christ', 8868), ('camp meeting', 7491), ('general conference', 7106), ('good health', 5845), ('young men', 5514), ('present truth', 5464), ('young man', 4836), ('present time', 4601)]...
root - INFO - at document #82000, considering 100000 phrases: [('young people', 9792), ('the life boat', 9502), ('jesus christ', 9297), ('camp meeting', 7524), ('general conference', 7147), ('good health', 5862), ('present truth', 5825), ('young men', 5566), ('young man', 4903), ('present time', 4768)]...
root - INFO - at document #83000, considering 100000 phrases: [('young people', 9808), ('jesus christ', 9715), ('the life boat', 9502), ('camp meeting', 7868), ('general conference', 7295), ('present truth', 6162), ('good health', 5873), ('young men', 5637), ('young man', 4995), ('present time', 4911)]...
root - INFO - at document #84000, considering 100000 phrases: [('jesus christ', 10304), ('young people', 9842), ('the life boat', 9502), ('camp meeting', 8103), ('general conference', 7453), ('present truth', 6532), ('good health', 5888), ('young men', 5778), ('present time', 5105), ('young man', 5096)]...
root - INFO - at document #85000, considering 100000 phrases: [('jesus christ', 10676), ('young people', 9899), ('the life boat', 9502), ('camp meeting', 8619), ('general conference', 7723), ('present truth', 6945), ('young men', 5938), ('good health', 5929), ('present time', 5295), ('young man', 5239)]...
root - INFO - at document #86000, considering 100000 phrases: [('jesus christ', 11023), ('young people', 9980), ('the life boat', 9502), ('camp meeting', 8947), ('general conference', 7959), ('present truth', 7196), ('good health', 6091), ('young men', 6073), ('present time', 5433), ('young man', 5357)]...
root - INFO - at document #87000, considering 100000 phrases: [('jesus christ', 11228), ('young people', 10031), ('the life boat', 9502), ('camp meeting', 9334), ('general conference', 8065), ('present truth', 7399), ('young men', 6205), ('good health', 6205), ('present time', 5521), ('young man', 5500)]...
root - INFO - at document #88000, considering 100000 phrases: [('jesus christ', 11462), ('young people', 10088), ('camp meeting', 9733), ('the life boat', 9502), ('general conference', 8278), ('present truth', 7614), ('good health', 6327), ('young men', 6315), ('present time', 5712), ('young man', 5582)]...
root - INFO - at document #89000, considering 100000 phrases: [('jesus christ', 11654), ('camp meeting', 10205), ('young people', 10149), ('the life boat', 9502), ('general conference', 8477), ('present truth', 7904), ('young men', 6428), ('good health', 6413), ('present time', 5901), ('missionary work', 5731)]...
root - INFO - at document #90000, considering 100000 phrases: [('jesus christ', 11866), ('camp meeting', 10899), ('young people', 10263), ('the life boat', 9502), ('general conference', 8772), ('present truth', 8296), ('young men', 6553), ('good health', 6514), ('present time', 6048), ('missionary work', 6038)]...
root - INFO - at document #91000, considering 100000 phrases: [('jesus christ', 12121), ('camp meeting', 11413), ('young people', 10342), ('the life boat', 9502), ('general conference', 9086), ('present truth', 8612), ('young men', 6680), ('good health', 6672), ('missionary work', 6246), ('present time', 6210)]...
root - INFO - at document #92000, considering 100000 phrases: [('jesus christ', 12381), ('camp meeting', 12033), ('young people', 10424), ('the life boat', 9502), ('general conference', 9405), ('present truth', 8900), ('young men', 6819), ('good health', 6720), ('missionary work', 6471), ('present time', 6385)]...
root - INFO - at document #93000, considering 100000 phrases: [('jesus christ', 12603), ('camp meeting', 12508), ('young people', 10520), ('general conference', 9670), ('the life boat', 9502), ('present truth', 9104), ('young men', 6953), ('good health', 6753), ('missionary work', 6685), ('present time', 6543)]...
root - INFO - at document #94000, considering 100000 phrases: [('camp meeting', 12963), ('jesus christ', 12950), ('young people', 10637), ('general conference', 9881), ('the life boat', 9502), ('present truth', 9252), ('young men', 7078), ('missionary work', 6835), ('good health', 6812), ('present time', 6704)]...
root - INFO - at document #95000, considering 100000 phrases: [('camp meeting', 13435), ('jesus christ', 13339), ('young people', 10756), ('general conference', 10193), ('the life boat', 9502), ('present truth', 9375), ('young men', 7174), ('missionary work', 6998), ('present time', 6906), ('good health', 6844)]...
root - INFO - at document #96000, considering 100000 phrases: [('camp meeting', 13760), ('jesus christ', 13700), ('young people', 10804), ('general conference', 10424), ('present truth', 9520), ('the life boat', 9502), ('young men', 7230), ('missionary work', 7155), ('present time', 7073), ('good health', 6870)]...
root - INFO - at document #97000, considering 100000 phrases: [('camp meeting', 14139), ('jesus christ', 14094), ('young people', 10895), ('general conference', 10625), ('present truth', 9657), ('the life boat', 9502), ('young men', 7334), ('missionary work', 7324), ('present time', 7216), ('good health', 6911)]...
root - INFO - at document #98000, considering 100000 phrases: [('camp meeting', 14467), ('jesus christ', 14337), ('young people', 10987), ('general conference', 10844), ('present truth', 9777), ('the life boat', 9502), ('missionary work', 7479), ('young men', 7424), ('present time', 7368), ('good health', 6960)]...
root - INFO - at document #99000, considering 100000 phrases: [('camp meeting', 14673), ('jesus christ', 14628), ('young people', 11135), ('general conference', 10951), ('present truth', 9897), ('the life boat', 9502), ('missionary work', 7597), ('young men', 7587), ('present time', 7464), ('good health', 7024)]...
root - INFO - at document #100000, considering 100000 phrases: [('jesus christ', 15087), ('camp meeting', 14878), ('young people', 11294), ('general conference', 11126), ('present truth', 10060), ('the life boat', 9502), ('young men', 7775), ('missionary work', 7746), ('present time', 7560), ('good health', 7068)]...
root - INFO - at document #101000, considering 100000 phrases: [('jesus christ', 15462), ('camp meeting', 15022), ('young people', 11382), ('general conference', 11297), ('present truth', 10244), ('the life boat', 9502), ('young men', 7925), ('missionary work', 7867), ('present time', 7658), ('good health', 7145)]...
root - INFO - at document #102000, considering 100000 phrases: [('jesus christ', 15719), ('camp meeting', 15261), ('young people', 11553), ('general conference', 11512), ('present truth', 10364), ('the life boat', 9502), ('young men', 8066), ('missionary work', 7989), ('present time', 7776), ('good health', 7235)]...
root - INFO - at document #103000, considering 100000 phrases: [('jesus christ', 15974), ('camp meeting', 15462), ('general conference', 11816), ('young people', 11700), ('present truth', 10524), ('the life boat', 9502), ('young men', 8189), ('missionary work', 8096), ('present time', 7872), ('good health', 7299)]...
root - INFO - at document #104000, considering 100000 phrases: [('jesus christ', 16163), ('camp meeting', 15611), ('general conference', 12007), ('young people', 11919), ('present truth', 10725), ('the life boat', 9502), ('young men', 8360), ('missionary work', 8187), ('present time', 7975), ('good health', 7369)]...
root - INFO - at document #105000, considering 100000 phrases: [('jesus christ', 16300), ('camp meeting', 15747), ('general conference', 12435), ('young people', 12176), ('present truth', 10945), ('the life boat', 9502), ('young men', 8500), ('missionary work', 8290), ('present time', 8114), ('good health', 7419)]...
root - INFO - at document #106000, considering 100000 phrases: [('jesus christ', 16433), ('camp meeting', 15973), ('general conference', 12894), ('young people', 12440), ('present truth', 11163), ('the life boat', 9502), ('young men', 8665), ('missionary work', 8382), ('present time', 8212), ('good health', 7451)]...
root - INFO - at document #107000, considering 100000 phrases: [('jesus christ', 16600), ('camp meeting', 16217), ('general conference', 13120), ('young people', 12781), ('present truth', 11377), ('the life boat', 9502), ('young men', 8885), ('missionary work', 8504), ('present time', 8336), ('good health', 7474)]...
root - INFO - at document #108000, considering 100000 phrases: [('jesus christ', 16752), ('camp meeting', 16398), ('general conference', 13380), ('young people', 13146), ('present truth', 11566), ('the life boat', 9502), ('young men', 9097), ('missionary work', 8595), ('present time', 8475), ('young man', 7553)]...
root - INFO - at document #109000, considering 100000 phrases: [('jesus christ', 16880), ('camp meeting', 16577), ('general conference', 13614), ('young people', 13473), ('present truth', 11731), ('the life boat', 9502), ('young men', 9242), ('missionary work', 8674), ('present time', 8578), ('young man', 7667)]...
root - INFO - at document #110000, considering 100000 phrases: [('jesus christ', 17018), ('camp meeting', 16690), ('general conference', 14088), ('young people', 13745), ('present truth', 11901), ('the life boat', 9502), ('young men', 9376), ('missionary work', 8773), ('present time', 8669), ('young man', 7745)]...
root - INFO - at document #111000, considering 100000 phrases: [('jesus christ', 17221), ('camp meeting', 16900), ('general conference', 14492), ('young people', 14025), ('present truth', 12062), ('young men', 9534), ('the life boat', 9502), ('missionary work', 8911), ('present time', 8760), ('young man', 7856)]...
root - INFO - at document #112000, considering 100000 phrases: [('jesus christ', 17361), ('camp meeting', 17109), ('general conference', 14749), ('young people', 14365), ('present truth', 12195), ('young men', 9701), ('the life boat', 9502), ('missionary work', 9028), ('present time', 8849), ('young man', 7944)]...
root - INFO - at document #113000, considering 100000 phrases: [('jesus christ', 17531), ('camp meeting', 17312), ('general conference', 14988), ('young people', 14649), ('present truth', 12325), ('young men', 9836), ('the life boat', 9502), ('missionary work', 9112), ('present time', 8935), ('young man', 8029)]...
root - INFO - at document #114000, considering 100000 phrases: [('jesus christ', 17688), ('camp meeting', 17556), ('general conference', 15254), ('young people', 14924), ('present truth', 12500), ('young men', 10025), ('the life boat', 9502), ('missionary work', 9209), ('present time', 9041), ('young man', 8125)]...
root - INFO - at document #115000, considering 100000 phrases: [('jesus christ', 17869), ('camp meeting', 17754), ('general conference', 15805), ('young people', 15158), ('present truth', 12705), ('young men', 10200), ('the life boat', 9502), ('missionary work', 9306), ('present time', 9142), ('young man', 8218)]...
root - INFO - at document #116000, considering 100000 phrases: [('jesus christ', 18024), ('camp meeting', 17978), ('general conference', 16032), ('young people', 15583), ('present truth', 12884), ('young men', 10333), ('the life boat', 9502), ('missionary work', 9454), ('present time', 9247), ('young man', 8322)]...
root - INFO - at document #117000, considering 100000 phrases: [('jesus christ', 18252), ('camp meeting', 18161), ('general conference', 16142), ('young people', 15849), ('present truth', 13119), ('young men', 10454), ('missionary work', 9574), ('the life boat', 9502), ('present time', 9358), ('young man', 8404)]...
root - INFO - at document #118000, considering 100000 phrases: [('jesus christ', 18405), ('camp meeting', 18370), ('general conference', 16272), ('young people', 16138), ('present truth', 13396), ('young men', 10565), ('missionary work', 9711), ('the life boat', 9502), ('present time', 9456), ('young man', 8507)]...
root - INFO - at document #119000, considering 100000 phrases: [('camp meeting', 18578), ('jesus christ', 18568), ('young people', 16482), ('general conference', 16394), ('present truth', 13724), ('young men', 10716), ('missionary work', 9838), ('present time', 9552), ('the life boat', 9502), ('young man', 8638)]...
root - INFO - at document #120000, considering 100000 phrases: [('camp meeting', 18815), ('jesus christ', 18699), ('young people', 16887), ('general conference', 16573), ('present truth', 14014), ('young men', 10950), ('missionary work', 9956), ('present time', 9648), ('the life boat', 9502), ('young man', 8754)]...
root - INFO - at document #121000, considering 100000 phrases: [('camp meeting', 19058), ('jesus christ', 18888), ('young people', 17413), ('general conference', 16993), ('present truth', 14251), ('young men', 11214), ('missionary work', 10064), ('present time', 9742), ('the life boat', 9502), ('young man', 8868)]...
root - INFO - at document #122000, considering 100000 phrases: [('camp meeting', 19228), ('jesus christ', 19067), ('young people', 17676), ('general conference', 17150), ('present truth', 14459), ('young men', 11395), ('missionary work', 10172), ('present time', 9852), ('the life boat', 9502), ('young man', 8967)]...
root - INFO - at document #123000, considering 100000 phrases: [('camp meeting', 19356), ('jesus christ', 19202), ('young people', 18014), ('general conference', 17352), ('present truth', 14587), ('young men', 11520), ('missionary work', 10285), ('present time', 9954), ('the life boat', 9502), ('young man', 9049)]...
root - INFO - at document #124000, considering 100000 phrases: [('camp meeting', 19561), ('jesus christ', 19288), ('young people', 18408), ('general conference', 17588), ('present truth', 14672), ('young men', 11678), ('missionary work', 10365), ('present time', 10031), ('the life boat', 9502), ('young man', 9139)]...
root - INFO - at document #125000, considering 100000 phrases: [('camp meeting', 19564), ('jesus christ', 19373), ('young people', 18455), ('general conference', 17603), ('present truth', 14674), ('young men', 11746), ('missionary work', 10372), ('present time', 10069), ('the life boat', 9502), ('young man', 9166)]...
root - INFO - at document #126000, considering 100000 phrases: [('camp meeting', 19566), ('jesus christ', 19469), ('young people', 18460), ('general conference', 17603), ('present truth', 14676), ('young men', 11777), ('missionary work', 10375), ('present time', 10091), ('the life boat', 9502), ('young man', 9174)]...
root - INFO - at document #127000, considering 100000 phrases: [('jesus christ', 19970), ('camp meeting', 19802), ('young people', 18480), ('general conference', 17722), ('present truth', 14851), ('young men', 11866), ('missionary work', 10467), ('present time', 10210), ('the life boat', 9502), ('young man', 9275)]...
root - INFO - at document #128000, considering 100000 phrases: [('jesus christ', 20337), ('camp meeting', 19958), ('young people', 18513), ('general conference', 17799), ('present truth', 15000), ('young men', 11959), ('missionary work', 10587), ('present time', 10310), ('the life boat', 9502), ('young man', 9424)]...
root - INFO - at document #129000, considering 100000 phrases: [('jesus christ', 20621), ('camp meeting', 20161), ('young people', 18560), ('general conference', 17865), ('present truth', 15099), ('young men', 12090), ('missionary work', 10698), ('present time', 10412), ('young man', 9565), ('the life boat', 9502)]...
root - INFO - at document #130000, considering 100000 phrases: [('jesus christ', 20886), ('camp meeting', 20438), ('young people', 18598), ('general conference', 17949), ('present truth', 15230), ('young men', 12212), ('missionary work', 10832), ('present time', 10530), ('young man', 9716), ('the life boat', 9502)]...
root - INFO - at document #131000, considering 100000 phrases: [('jesus christ', 21118), ('camp meeting', 20554), ('young people', 18645), ('general conference', 18019), ('present truth', 15321), ('young men', 12309), ('missionary work', 10911), ('present time', 10597), ('young man', 9832), ('the life boat', 9502)]...
root - INFO - at document #132000, considering 100000 phrases: [('jesus christ', 21468), ('camp meeting', 20689), ('young people', 18667), ('general conference', 18080), ('present truth', 15392), ('young men', 12399), ('missionary work', 10963), ('present time', 10665), ('young man', 9960), ('the life boat', 9502)]...
root - INFO - at document #133000, considering 100000 phrases: [('jesus christ', 21861), ('camp meeting', 20802), ('young people', 18698), ('general conference', 18135), ('present truth', 15447), ('young men', 12498), ('missionary work', 11017), ('present time', 10743), ('young man', 10030), ('the life boat', 9502)]...
root - INFO - at document #134000, considering 100000 phrases: [('jesus christ', 22430), ('camp meeting', 20845), ('young people', 18733), ('general conference', 18190), ('present truth', 15466), ('young men', 12555), ('missionary work', 11100), ('present time', 10816), ('young man', 10109), ('the life boat', 9502)]...
root - INFO - at document #135000, considering 100000 phrases: [('jesus christ', 23137), ('camp meeting', 20914), ('young people', 18764), ('general conference', 18219), ('present truth', 15487), ('young men', 12612), ('missionary work', 11153), ('present time', 10894), ('young man', 10188), ('the life boat', 9502)]...
root - INFO - at document #136000, considering 100000 phrases: [('jesus christ', 23678), ('camp meeting', 20953), ('young people', 18788), ('general conference', 18257), ('present truth', 15508), ('young men', 12699), ('missionary work', 11210), ('present time', 10984), ('young man', 10260), ('the life boat', 9502)]...
root - INFO - at document #137000, considering 100000 phrases: [('jesus christ', 24080), ('camp meeting', 21003), ('young people', 18840), ('general conference', 18275), ('present truth', 15541), ('young men', 12777), ('missionary work', 11266), ('present time', 11084), ('young man', 10379), ('the life boat', 9502)]...
root - INFO - at document #138000, considering 100000 phrases: [('jesus christ', 24504), ('camp meeting', 21057), ('young people', 18877), ('general conference', 18331), ('present truth', 15566), ('young men', 12878), ('missionary work', 11301), ('present time', 11164), ('young man', 10486), ('the life boat', 9502)]...
root - INFO - at document #139000, considering 100000 phrases: [('jesus christ', 24947), ('camp meeting', 21078), ('young people', 18920), ('general conference', 18364), ('present truth', 15595), ('young men', 12940), ('missionary work', 11338), ('present time', 11253), ('young man', 10578), ('the life boat', 9502)]...
root - INFO - at document #140000, considering 100000 phrases: [('jesus christ', 25475), ('camp meeting', 21091), ('young people', 18965), ('general conference', 18393), ('present truth', 15639), ('young men', 13003), ('missionary work', 11370), ('present time', 11368), ('young man', 10649), ('the life boat', 9502)]...
root - INFO - at document #141000, considering 100000 phrases: [('jesus christ', 25919), ('camp meeting', 21103), ('young people', 19009), ('general conference', 18409), ('present truth', 15682), ('young men', 13075), ('present time', 11453), ('missionary work', 11409), ('young man', 10777), ('the life boat', 9502)]...
root - INFO - at document #142000, considering 100000 phrases: [('jesus christ', 26408), ('camp meeting', 21175), ('young people', 19078), ('general conference', 18437), ('present truth', 15700), ('young men', 13144), ('present time', 11556), ('missionary work', 11433), ('young man', 10888), ('the life boat', 9502)]...
root - INFO - at document #143000, considering 100000 phrases: [('jesus christ', 26760), ('camp meeting', 21189), ('young people', 19133), ('general conference', 18467), ('present truth', 15708), ('young men', 13215), ('present time', 11660), ('missionary work', 11460), ('young man', 10952), ('the life boat', 9502)]...
root - INFO - at document #144000, considering 100000 phrases: [('jesus christ', 27077), ('camp meeting', 21192), ('young people', 19166), ('general conference', 18491), ('present truth', 15714), ('young men', 13280), ('present time', 11749), ('missionary work', 11479), ('young man', 11021), ('the life boat', 9502)]...
root - INFO - at document #145000, considering 100000 phrases: [('jesus christ', 27471), ('camp meeting', 21192), ('young people', 19180), ('general conference', 18495), ('present truth', 15723), ('young men', 13341), ('present time', 11805), ('missionary work', 11494), ('young man', 11106), ('the life boat', 9502)]...
root - INFO - at document #146000, considering 100000 phrases: [('jesus christ', 27783), ('camp meeting', 21304), ('young people', 19280), ('general conference', 18545), ('present truth', 15766), ('young men', 13412), ('present time', 11872), ('missionary work', 11595), ('young man', 11172), ('new testament', 9524)]...
root - INFO - at document #147000, considering 100000 phrases: [('jesus christ', 27818), ('camp meeting', 21692), ('young people', 19606), ('general conference', 18737), ('present truth', 15860), ('young men', 13519), ('present time', 11939), ('missionary work', 11740), ('young man', 11252), ('good work', 9651)]...
root - INFO - at document #148000, considering 100000 phrases: [('jesus christ', 27846), ('camp meeting', 22167), ('young people', 19895), ('general conference', 18955), ('present truth', 15946), ('young men', 13585), ('present time', 12066), ('missionary work', 11899), ('young man', 11349), ('good work', 9795)]...
root - INFO - at document #149000, considering 100000 phrases: [('jesus christ', 27878), ('camp meeting', 22431), ('young people', 20308), ('general conference', 19076), ('present truth', 16170), ('young men', 13684), ('present time', 12168), ('missionary work', 12086), ('young man', 11408), ('good work', 9956)]...
root - INFO - at document #150000, considering 100000 phrases: [('jesus christ', 27943), ('camp meeting', 22446), ('young people', 20614), ('general conference', 19203), ('present truth', 16301), ('young men', 13776), ('missionary work', 12243), ('present time', 12242), ('young man', 11526), ('good work', 10030)]...
root - INFO - at document #151000, considering 100000 phrases: [('jesus christ', 28068), ('camp meeting', 22501), ('young people', 21094), ('general conference', 19300), ('present truth', 16491), ('young men', 13898), ('missionary work', 12464), ('present time', 12285), ('young man', 11693), ('good work', 10087)]...
root - INFO - at document #152000, considering 100000 phrases: [('jesus christ', 28122), ('camp meeting', 22509), ('young people', 21118), ('general conference', 19332), ('present truth', 16544), ('young men', 13971), ('missionary work', 12570), ('present time', 12327), ('young man', 11730), ('good work', 10118)]...
root - INFO - at document #153000, considering 100000 phrases: [('jesus christ', 28176), ('camp meeting', 22646), ('young people', 21424), ('general conference', 19439), ('present truth', 16600), ('young men', 14046), ('missionary work', 12685), ('present time', 12375), ('young man', 11791), ('good work', 10172)]...
root - INFO - at document #154000, considering 100000 phrases: [('jesus christ', 28319), ('camp meeting', 22756), ('young people', 21620), ('general conference', 19493), ('present truth', 16655), ('young men', 14089), ('missionary work', 12735), ('present time', 12421), ('young man', 11839), ('good work', 10215)]...
root - INFO - at document #155000, considering 100000 phrases: [('jesus christ', 28380), ('camp meeting', 22787), ('young people', 21663), ('general conference', 19497), ('present truth', 16666), ('young men', 14175), ('missionary work', 12760), ('present time', 12451), ('young man', 11956), ('good work', 10243)]...
root - INFO - at document #156000, considering 100000 phrases: [('jesus christ', 28469), ('camp meeting', 22847), ('young people', 21705), ('general conference', 19501), ('present truth', 16675), ('young men', 14227), ('missionary work', 12805), ('present time', 12497), ('young man', 12101), ('good work', 10283)]...
root - INFO - at document #157000, considering 100000 phrases: [('jesus christ', 28614), ('camp meeting', 22909), ('young people', 21794), ('general conference', 19508), ('present truth', 16682), ('young men', 14292), ('missionary work', 12848), ('present time', 12557), ('young man', 12267), ('good work', 10305)]...
root - INFO - at document #158000, considering 100000 phrases: [('jesus christ', 28697), ('camp meeting', 22919), ('young people', 21976), ('general conference', 19514), ('present truth', 16706), ('young men', 14388), ('missionary work', 12887), ('present time', 12581), ('young man', 12396), ('good work', 10333)]...
root - INFO - at document #159000, considering 100000 phrases: [('jesus christ', 28795), ('camp meeting', 22937), ('young people', 22637), ('general conference', 19539), ('present truth', 16727), ('young men', 14563), ('missionary work', 12976), ('present time', 12616), ('young man', 12613), ('good work', 10376)]...
root - INFO - at document #160000, considering 100000 phrases: [('jesus christ', 28883), ('young people', 23113), ('camp meeting', 22958), ('general conference', 19579), ('present truth', 16755), ('young men', 14745), ('missionary work', 13058), ('young man', 12876), ('present time', 12657), ('good work', 10424)]...
root - INFO - at document #161000, considering 100000 phrases: [('jesus christ', 28976), ('young people', 23592), ('camp meeting', 22973), ('general conference', 19636), ('present truth', 16797), ('young men', 14997), ('young man', 13184), ('missionary work', 13115), ('present time', 12685), ('good work', 10452)]...
root - INFO - at document #162000, considering 100000 phrases: [('jesus christ', 29042), ('young people', 23881), ('camp meeting', 22986), ('general conference', 19702), ('present truth', 16807), ('young men', 15144), ('young man', 13398), ('missionary work', 13144), ('present time', 12706), ('good work', 10478)]...
root - INFO - at document #163000, considering 100000 phrases: [('jesus christ', 29098), ('young people', 24114), ('camp meeting', 22998), ('general conference', 19715), ('present truth', 16819), ('young men', 15245), ('young man', 13588), ('missionary work', 13172), ('present time', 12731), ('good work', 10500)]...
root - INFO - at document #164000, considering 100000 phrases: [('jesus christ', 29177), ('young people', 24386), ('camp meeting', 23017), ('general conference', 19735), ('present truth', 16835), ('young men', 15353), ('young man', 13773), ('missionary work', 13218), ('present time', 12746), ('good work', 10524)]...
root - INFO - at document #165000, considering 100000 phrases: [('jesus christ', 29268), ('young people', 24558), ('camp meeting', 23024), ('general conference', 19778), ('present truth', 16839), ('young men', 15462), ('young man', 13959), ('missionary work', 13253), ('present time', 12780), ('good work', 10552)]...
root - INFO - at document #166000, considering 100000 phrases: [('jesus christ', 29342), ('young people', 24794), ('camp meeting', 23055), ('general conference', 19809), ('present truth', 16848), ('young men', 15576), ('young man', 14133), ('missionary work', 13278), ('present time', 12807), ('good work', 10580)]...
root - INFO - at document #167000, considering 100000 phrases: [('jesus christ', 29405), ('young people', 24985), ('camp meeting', 23064), ('general conference', 19820), ('present truth', 16863), ('young men', 15697), ('young man', 14322), ('missionary work', 13307), ('present time', 12830), ('good work', 10615)]...
root - INFO - at document #168000, considering 100000 phrases: [('jesus christ', 29450), ('young people', 25265), ('camp meeting', 23081), ('general conference', 19836), ('present truth', 16885), ('young men', 15789), ('young man', 14514), ('missionary work', 13341), ('present time', 12861), ('good work', 10631)]...
root - INFO - at document #169000, considering 100000 phrases: [('jesus christ', 29505), ('young people', 25654), ('camp meeting', 23097), ('general conference', 19889), ('present truth', 16896), ('young men', 15932), ('young man', 14698), ('missionary work', 13402), ('present time', 12883), ('good work', 10652)]...
root - INFO - selected 2000 entities: ['creek michigan', 'large increase', 'precious light', 'active interest', 'good home', 'strong drink', 'universal postal', 'whole earth', 'open doors', 'perfect obedience']...
In [9]:
phrases
Out[9]:
{'creek michigan',
 'large increase',
 'precious light',
 'active interest',
 'good home',
 'strong drink',
 'universal postal',
 'whole earth',
 'open doors',
 'perfect obedience',
 'american people',
 'god rev',
 'good effect',
 'medical school',
 'dead man',
 'christ jesus',
 'good number',
 'past summer',
 'whole day',
 'instructor vol',
 'class work',
 'city work',
 'general government',
 'good way',
 'twentieth century',
 'eld haskell',
 'honest souls',
 'mighty work',
 'definite time',
 'present condition',
 'fiery furnace',
 'modern science',
 'different times',
 'camp ground',
 'thy kingdom',
 'possible way',
 'creek mich',
 'way home',
 'certain extent',
 'school officers',
 'various phases',
 'god cor',
 'roman catholic church',
 'new man',
 'christian work',
 'self exaltation',
 'manual training',
 'good words',
 'sabbath july',
 'prominent part',
 'advent review and sabbath herald vol',
 'field secretary',
 'animal life',
 'great majority',
 'cold bath',
 'red sea',
 'health journal',
 'own sins',
 'ceremonial law',
 'thou wilt',
 'great encouragement',
 'strong effort',
 'american republic',
 'new song',
 'flesh meats',
 'faithful service',
 'year cents',
 'executive committee',
 'own finger',
 'canvassers institute',
 'true condition',
 'conference office',
 'sea level',
 'false gods',
 'self righteousness',
 'chain gang',
 'religious people',
 'temperance reform',
 'particulars address',
 'remnant church',
 'moral obligation',
 'large family',
 'home circle',
 'special efforts',
 'medical missionaries',
 'general health',
 'long time',
 'long beach',
 'splendid work',
 'college course',
 'training schools',
 'good health',
 'favorable opportunity',
 'early part',
 'religious teachers',
 'lord bless',
 'thou shalt',
 'school lessons',
 'school children',
 'various lines',
 'great truths',
 'valuable assistance',
 'suitable place',
 'grand total',
 'good authority',
 'medical students',
 'whole congregation',
 'faithful ones',
 'state church',
 'shalt thou labor',
 'hearty response',
 'excellent work',
 'red cross',
 'wide spread',
 'right living',
 'corner stones',
 'fellow beings',
 'great events',
 'large audience',
 'evil doers',
 'accompany copy',
 'quiet spirit',
 'total abstinence',
 'southern union conference',
 'self control',
 'weekly visits',
 'los angeles cal',
 'thy truth thy word',
 'lord cometh',
 'interesting facts',
 'dear sister',
 'days meeting',
 'high position',
 'simple faith',
 'union mission',
 'mighty works',
 'sanitarium work',
 'own son',
 'such times',
 'thoughts perish',
 'small pox',
 'good people',
 'dark night',
 'wholesome food',
 'church service',
 'american citizens',
 'wait till',
 'great mistake',
 'different lines',
 'joseph bates',
 'religious worship',
 'business meetings',
 'daily news',
 'the life boat',
 'own words',
 'conference officers',
 'dollars worth',
 'post office address',
 'good reports',
 'human history',
 'evening service',
 'great crisis',
 'lake academy',
 'real interest',
 'own lusts',
 'evil spirits',
 'liquor traffic',
 'christian service',
 'great nations',
 'fiscal year',
 'remnant people',
 'great loss',
 'spiritual condition',
 'federal government',
 'final triumph',
 'saviour jesus christ',
 'battle field',
 'local societies',
 'beautiful home',
 'years experience',
 'gilt edges',
 'evil angels',
 'rio grande',
 'white house',
 'southern junior college',
 'great nation',
 'good spirit',
 'profitable meeting',
 'divine life',
 'precious truths',
 'new life',
 'thorough work',
 'open air',
 'fierce anger',
 'pure water',
 'father mother',
 'college press',
 'general interest',
 'special manner',
 'public places',
 'strong men',
 'religious experience',
 'the american sentinel',
 'common sense',
 'religious observance',
 'own country',
 'key note',
 'old testament',
 'life liberty',
 'great apostasy',
 'fundamental principles',
 'young girl',
 'great price',
 'urgent calls',
 'lake union',
 'whole land',
 'sabbath june',
 'small sum',
 'lake union conference',
 'early writings',
 'family worship',
 'individual members',
 'near future',
 'different branches',
 'years afterward',
 'great plan',
 'human beings',
 'herald takoma',
 'white men',
 'urgent call',
 'wonderful works',
 'urgent need',
 'common people',
 'pacific press',
 'advent movement',
 'good plan',
 'excellent spirit',
 'own lives',
 'eternal truth',
 'christian graces',
 'state board',
 'sure word',
 'evening services',
 'christ christ',
 'tract society',
 'great principles',
 'canadian union conference',
 'new moon',
 'young women',
 'white cloud',
 'school offerings',
 'north american division conference',
 'false teachers',
 'god who',
 'religious world',
 'effective work',
 'thou art',
 'encouraging report',
 'whole system',
 'whole heart',
 'social life',
 'mission funds',
 'tobacco habit',
 'foreign work',
 'law till',
 'excellent interest',
 'greek word',
 'great difference',
 'threefold message',
 'burnt offerings',
 'missionary labor',
 'wise man',
 'poor people',
 'new religion',
 'willing souls',
 'peculiar people',
 'certain amount',
 'certain conditions',
 'righteous judge',
 'river conference',
 'bible sabbath',
 'may god',
 'human body',
 'religious persecution',
 'general use',
 'divine institution',
 'tent companies',
 'jesus christ',
 'private house',
 'judgment seat',
 'moral nature',
 'north american division',
 'and temperance advocate',
 'bad habit',
 'persons desire',
 'saith unto',
 'great controversy',
 'physical development',
 'prophetic periods',
 'midnight cry',
 'white people',
 'careful study',
 'good work',
 'good food',
 'large portion',
 'whole armor',
 'nashville tenn',
 'new light',
 'such expressions',
 'good word',
 'great earthquake',
 'interesting account',
 'church building',
 'great message',
 'entire population',
 'large majority',
 'christian church',
 'religious service',
 'self denial',
 'goon health',
 'church school teacher',
 'blood vessels',
 'james white',
 'past winter',
 'good news',
 'public mind',
 'mission schools',
 'young girls',
 'corner stone',
 'hearty support',
 'faithful work',
 'true faith',
 'human family',
 'great responsibility',
 'reading courses',
 'whole country',
 'business meeting',
 'dear souls',
 'gospel herald',
 'great tribulation',
 'school association',
 'express purpose',
 'thank god',
 'funeral services',
 'church officers',
 'evil ways',
 'solemn warning',
 'true church',
 'sixteenth century',
 'sabbath december',
 'such legislation',
 'divine authority',
 'fresh air',
 'present day',
 'thy brother',
 'personal interest',
 'false doctrines',
 'new thing',
 'true character',
 'personal work',
 'fine gold',
 'careful reading',
 'church elders',
 'great expense',
 'old men',
 'day offerings',
 'great movement',
 'white raiment',
 'business men',
 'church membership',
 'long experience',
 'mission fields',
 'drug store',
 'important factor',
 'thine heart',
 'bad habits',
 'the sentinel',
 'life and health',
 'lord hath',
 'old gentleman',
 'christian schools',
 'practical guide',
 'valuable instruction',
 'church schools',
 'lay plans',
 'european war',
 'important truths',
 'blue eyes',
 'poor health',
 'human heart',
 'small books',
 'various reasons',
 'lay members',
 'vast amount',
 'large measure',
 'excellent book',
 'thy stranger',
 'new house',
 'glad tidings',
 'sum total',
 'summer months',
 'human authority',
 'missionary operations',
 'thou good',
 'good name',
 'own home',
 'large numbers',
 'whole body',
 'time past',
 'union conference committee',
 'little book',
 'wonderful things',
 'central union conference',
 'mary magdalene',
 'aggressive work',
 'dear friends',
 'important point',
 'new subscriptions',
 'dear lord',
 'different languages',
 'modern civilization',
 'large books',
 'church school',
 'solemn message',
 'present state',
 'great field',
 'school teachers',
 'spiritual growth',
 'christian science',
 'lemon juice',
 'hard time',
 'large part',
 'past quarter',
 'poor man',
 'divine lord',
 'industrial work',
 'sun moon',
 'lord delayeth',
 'home life',
 'new members',
 'john wesley',
 'rich man',
 'important matters',
 'public sentiment',
 'wide awake',
 'great man',
 'deepest interest',
 'physical condition',
 'large sum',
 'upper part',
 'important points',
 'own weakness',
 'normal condition',
 'grand work',
 'christian age',
 'family altar',
 'investigative judgment',
 'american brethren',
 'total number',
 'eastern part',
 'sick people',
 'good faith',
 'important question',
 'missionary secretary',
 'blosser berrien',
 'double boiler',
 'religious bodies',
 'northern part',
 'moral government',
 'flesh food',
 'small children',
 'mortal man',
 'buenos ayres',
 'medical mission',
 'great light',
 'special number',
 'michigan tract',
 'special course',
 'religious test',
 'such power',
 'old man',
 'bright hope',
 'eternal ages',
 'thy foot',
 'infinite wisdom',
 'general agent',
 'object lesson',
 'native workers',
 'whole number',
 'mucous membrane',
 'own hands',
 'great joy',
 'moral law',
 'new world',
 'tent meeting',
 'digestive organs',
 'great city',
 'great words',
 'familiar spirits',
 'glad day',
 'medical secretary',
 'leo xiii',
 'whole subject',
 'practical value',
 'civil government',
 'week fund',
 'episcopal church',
 'thy children',
 'cubic feet',
 'religious work',
 'expiration date',
 'sabbath april',
 'sample copy',
 'large cities',
 'the youth',
 'conference laborers',
 'general meetings',
 'own power',
 'various branches',
 'capital city',
 'true tabernacle',
 'the advocate',
 'own image',
 'great object',
 'thick darkness',
 'gold coast',
 'excellent success',
 'jewish church',
 'special reference',
 'conference secretary',
 'emperor william',
 'rich men',
 'real estate',
 'active service',
 'year book',
 'spiritual gifts',
 'young lady',
 'missionary purposes',
 'dear friend',
 'present century',
 'heathen nations',
 'recent meeting',
 'cold water',
 'divine law',
 'definite plans',
 'own experience',
 'chicago ill',
 'large company',
 'onward march',
 'careful attention',
 'intermediate school',
 'new order',
 'practical experience',
 'new churches',
 'proper way',
 'temporal millennium',
 'loud cry',
 'prosperous condition',
 'subscription price',
 'text book',
 'sufficient amount',
 'los angeles',
 'precious time',
 'great pleasure',
 'special care',
 'dear son',
 'ingathering signs',
 'right arm',
 'nurses course',
 'false prophet',
 'boat rescue',
 'health reform',
 'full report',
 'state agent',
 'good deeds',
 'dead men',
 'life eternal',
 'different departments',
 'such persons',
 'whole city',
 'whole field',
 'general plan',
 'great king',
 'san jose',
 'forty years',
 'tent effort',
 'true light',
 'late clean copies',
 'religious sentiment',
 'efficient workers',
 'rural districts',
 'good reading',
 'present position',
 'funeral service',
 'glorious light',
 'sure foundation',
 'regular meetings',
 'high places',
 'outside attendance',
 'biennial session',
 'own works',
 'good help',
 'missionary spirit',
 'strong man',
 'right principles',
 'rapid growth',
 'real value',
 'great image',
 'field agent',
 'such time',
 'hearty cooperation',
 'own heart',
 'tent season',
 'foreign mission',
 'old time',
 'social meetings',
 'heart disease',
 'thy voice',
 'bro andrews',
 'past history',
 'great principle',
 'good beginning',
 'county seat',
 'christian life',
 'small town',
 'general principles',
 'whole thing',
 'poor soul',
 'financial condition',
 'large degree',
 'full assurance',
 'bay city',
 'pure gold',
 'thy mind',
 'assistant secretary',
 'different churches',
 'church school teachers',
 'definite day',
 'unto salvation',
 'christian education',
 'good service',
 'money orders',
 'electric lights',
 'missionary workers',
 'full amount',
 'thy name',
 'bible training',
 'biennial period',
 'personal letter',
 'own land',
 'workers meeting',
 'own eyes',
 'white flour',
 'religious questions',
 'eternal weight',
 'great degree',
 'new wine',
 'general articles',
 'evening meeting',
 'civil liberty',
 'natural result',
 'careful consideration',
 'thy son',
 'various conferences',
 'whole nation',
 'conference workers',
 'thy house',
 'strange thing',
 'personal labor',
 'lovely whatsoever things',
 'lord thy',
 'loma linda',
 'thy work',
 'own family',
 'joint heirs',
 'own mind',
 'disease germs',
 'thy father',
 'alimentary canal',
 'long way',
 'worth living',
 'large quantity',
 'human wisdom',
 'small quantities',
 'tent meetings',
 'own way',
 'divine grace',
 'private letter',
 'great day',
 'active part',
 'intelligent people',
 'additional word',
 'physical health',
 'wrong habits',
 'saloon keepers',
 'natural immortality',
 'general way',
 'student body',
 'european division',
 'medical men',
 'good use',
 'mission field',
 'eternal world',
 'common use',
 'religious things',
 'yellow fever',
 'students library',
 'good conscience',
 'good judgment',
 'school donations',
 'early morning',
 'human laws',
 'faithful servant',
 'the home missionary',
 'human system',
 'own self',
 'own soul',
 'high school',
 'office address',
 'great apostle',
 'good attendance',
 'times past',
 'good books',
 'great army',
 'god rom',
 'old year',
 'considerable number',
 'important events',
 'sabbath october',
 'pacific union conference',
 'local conferences',
 'christian world',
 'such teaching',
 'local church',
 'special prayer',
 'great source',
 'great struggle',
 'large amount',
 'foreign fields',
 'resurrection morning',
 'school department',
 'post offices',
 'lord calls',
 'good fruit',
 'bible readings',
 'different kinds',
 'divine plan',
 'return unto',
 'intermediate schools',
 'subscription books',
 'mission board',
 'large tent',
 'personal effort',
 'temperance instructor',
 'new law',
 'entire field',
 'certain class',
 'conference tract society',
 'dead bodies',
 'young ladies',
 'dry land',
 'medical work',
 'business matters',
 'false prophets',
 'moral character',
 'young friends',
 'bible footlights',
 'whatsoever things',
 'open door',
 'health principles',
 'burnt offering',
 'hot water',
 'vice president',
 'warning message',
 'special meeting',
 'thy people',
 'fellow citizens',
 'subscription list',
 'american federation',
 'american medical association',
 'text books',
 'entire year',
 'wrong side',
 'great men',
 'lord lord',
 'great white throne',
 'great privilege',
 'mountain side',
 'school year',
 'early years',
 'glorious work',
 'good report',
 'great harvest field',
 'church buildings',
 'address cts',
 'man hath',
 'new territory',
 'spake often',
 'graysville tenn',
 'night meetings',
 'school teacher',
 'good advice',
 'true whatsoever things',
 'church fellowship',
 'early date',
 'day school',
 'school board',
 'great increase',
 'heavy burden',
 'free gift',
 'annual report',
 'old friend',
 'natural law',
 'great benefit',
 'intense interest',
 'mission offerings',
 'prayer meeting',
 'spiritual interests',
 'flesh foods',
 'great cities',
 'tract society office',
 'thou knowest',
 'good tidings',
 'own hand',
 'self respect',
 'full time',
 'small number',
 'special work',
 'spiritual welfare',
 'junior college',
 'good attention',
 'roman power',
 'inhabitants thereof',
 'own salvation',
 'divine truth',
 'deep interest',
 'noble work',
 'things work',
 'important meeting',
 'sixty years',
 'eye salve',
 'foreign missions',
 'lord bath',
 'ice cream',
 'gross darkness',
 'good day',
 'old dispensation',
 'recent visit',
 'review and herald',
 'county jail',
 'own people',
 'state meeting',
 'different countries',
 'proper observance',
 'little company',
 'warm weather',
 'foreign field',
 'rest unto',
 'whole truth',
 'native land',
 'special instruction',
 'seventhday adventists',
 'human law',
 'christian home',
 'health reformer',
 'conference association',
 'wise pass',
 'senate committee',
 'christian duty',
 'net gain',
 'utter destruction',
 'school work',
 'opening exercises',
 'internal organs',
 'the advent review and sabbath herald vol',
 'papal power',
 'asia minor',
 'religious rights',
 'large number',
 'great sacrifice',
 'right thing',
 'general meeting',
 'new converts',
 'various ways',
 'rainy season',
 'daily bread',
 'march acceptance',
 'spiritual blessings',
 'beautiful grove',
 'medo persia',
 'perilous times',
 'earnest effort',
 'considerable interest',
 'church organization',
 'good experiences',
 'ill health',
 'full quota',
 'lord desires',
 'daily life',
 'own glory',
 'entire family',
 'old woman',
 'thy mouth',
 'universal peace',
 'past year',
 'bible teacher',
 'whole law',
 'evangelistic work',
 'old home',
 'early age',
 'prime minister',
 'great variety',
 'rich blessings',
 'present war',
 'national government',
 'great influence',
 'new ones',
 'new book',
 'public meetings',
 'school house',
 'early history',
 'right way',
 'redemption draweth nigh',
 'dark continent',
 'divine word',
 'etc etc',
 'special message',
 'civil affairs',
 'previous meeting',
 'long run',
 'fear god',
 'public opinion',
 'hinsdale sanitarium',
 'baptismal service',
 'class matter',
 'new era',
 'extra postage',
 'firm believer',
 'important branch',
 'pacific ocean',
 'common law',
 'future work',
 'things whatsoever',
 'thy life',
 'marvelous light',
 'state secretary',
 'new address',
 'home missionary',
 'russian government',
 'papal church',
 'unto death',
 'small company',
 'page sheet',
 'rich harvest',
 'carnal mind',
 'annual camp meeting',
 'almighty god',
 'huntsville ala',
 'deep impression',
 'such instruction',
 'uriah smith',
 'street car',
 'vegetable kingdom',
 'sabbath march',
 'different denominations',
 'good interest',
 'equal rights',
 'school secretary',
 'such conditions',
 'washington post',
 'great care',
 'morning service',
 'such books',
 'good condition',
 'annual meeting',
 'foreign lands',
 'good courage',
 'southwestern union conference',
 'such schools',
 'uncle sam',
 'farm work',
 'advent review',
 'whole life',
 'life giver',
 'meeting the',
 'sinful man',
 'opposite side',
 'regular work',
 'new building',
 'tenth day',
 'great interest',
 'precious truth',
 'general conference daily bulletin',
 'new place',
 'family tents',
 'school building',
 'young man',
 'ten commandments',
 'public school system',
 'outside interest',
 ...}
In [11]:
with open("/Users/jeriwieringa/Dissertation/drafts/data/module-3/2017-04-noun-phrases-2000.txt", "w") as o:
    for phrase in list(phrases):
        o.write("{}\n".format(phrase))
o.close()
In [12]:
# %load ../shared_elements/system_info.py
import IPython
print (IPython.sys_info())
!pip freeze
{'commit_hash': '5c9c918',
 'commit_source': 'installation',
 'default_encoding': 'UTF-8',
 'ipython_path': '/Users/jeriwieringa/miniconda3/envs/dissertation2/lib/python3.5/site-packages/IPython',
 'ipython_version': '5.1.0',
 'os_name': 'posix',
 'platform': 'Darwin-16.5.0-x86_64-i386-64bit',
 'sys_executable': '/Users/jeriwieringa/miniconda3/envs/dissertation2/bin/python',
 'sys_platform': 'darwin',
 'sys_version': '3.5.2 |Continuum Analytics, Inc.| (default, Jul  2 2016, '
                '17:52:12) \n'
                '[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]'}
alabaster==0.7.10
anaconda-client==1.5.5
appdirs==1.4.3
appnope==0.1.0
argh==0.26.1
Babel==2.3.4
beautifulsoup4==4.5.3
blinker==1.4
bokeh==0.12.4
boto==2.43.0
brewer2mpl==1.4.1
bz2file==0.98
chest==0.2.3
cleanOCR==0.1
cloudpickle==0.2.2
clyent==1.2.2
cycler==0.10.0
dask==0.12.0
datashader==0.4.0
datashape==0.5.2
decorator==4.0.11
docutils==0.13.1
doit==0.30.3
gensim==0.12.4
geoplotlib==0.3.2
ggplot==0.11.5
Ghost.py==0.2.3
ghp-import2==1.0.1
GoH==0.1
gspread==0.4.1
HeapDict==1.0.0
httplib2==0.9.2
husl==4.0.3
ijson==2.3
imagesize==0.7.1
ipykernel==4.5.2
ipython==5.1.0
ipython-genutils==0.1.0
ipywidgets==5.2.2
Jinja2==2.8
jsonschema==2.5.1
jupyter==1.0.0
jupyter-client==4.4.0
jupyter-console==5.0.0
jupyter-contrib-core==0.3.0
jupyter-contrib-nbextensions==0.2.2
jupyter-core==4.2.1
jupyter-highlight-selected-word==0.0.5
jupyter-latex-envs==1.3.5.4
jupyter-nbextensions-configurator==0.2.3
llvmlite==0.14.0
locket==0.2.0
Logbook==1.0.0
lxml==3.7.3
MacFSEvents==0.7
Mako==1.0.6
Markdown==2.6.7
MarkupSafe==1.0
matplotlib==2.0.0
memory-profiler==0.43
mistune==0.7.3
multipledispatch==0.4.9
natsort==5.0.2
nb-anacondacloud==1.2.0
nb-conda==2.0.0
nb-conda-kernels==2.0.0
nb-config-manager==0.1.3
nbbrowserpdf==0.2.1
nbconvert==4.2.0
nbformat==4.2.0
nbpresent==3.0.2
networkx==1.11
Nikola==7.8.4
nltk==3.2.2
notebook==4.2.3
numba==0.29.0
numpy==1.12.1
oauth2client==4.0.0
OCRreports==0.1
odo==0.5.0
olefile==0.44
packaging==16.8
pandas==0.19.2
partd==0.3.6
path.py==0.0.0
pathtools==0.1.2
patsy==0.4.1
pdfminer3k==1.3.1
pexpect==4.0.1
pickleshare==0.7.4
piexif==1.0.12
Pillow==4.0.0
plotly==2.0.1
ply==3.10
pockets==0.3.2
prompt-toolkit==1.0.9
psutil==4.3.0
ptyprocess==0.5.1
py==1.4.32
pyasn1==0.1.9
pyasn1-modules==0.0.8
pycrypto==2.6.1
pyglet==1.2.4
Pygments==2.2.0
pyparsing==2.2.0
PyPDF2==1.25.1
PyRSS2Gen==1.1
pyshp==1.2.10
pytest==3.0.6
python-dateutil==2.6.0
pytz==2017.2
pyxDamerauLevenshtein==1.4.1
PyYAML==3.12
pyzmq==16.0.2
qtconsole==4.2.1
requests==2.13.0
rsa==3.4.2
scipy==0.18.1
seaborn==0.7.1
simplegeneric==0.8.1
six==1.10.0
smart-open==1.3.5
snowballstemmer==1.2.1
Sphinx==1.5.1
sphinx-rtd-theme==0.2.0
sphinxcontrib-napoleon==0.6.1
statsmodels==0.8.0
terminado==0.6
textblob==0.11.1
toolz==0.8.1
tornado==4.4.2
traitlets==4.3.1
Unidecode==0.4.20
verifyOCR==0.1
watchdog==0.8.3
wcwidth==0.1.7
webassets==0.11.1
wget==2.2
widgetsnbextension==1.2.6
ws4py==0.3.4
xarray==0.8.2
Yapsy==1.11.223
In [ ]: