Charts-of-corpus-statistics

In [1]:
import os
import pandas as pd
In [2]:
dataDir = "/Users/jeriwieringa/Dissertation/data/"
vizDir = "/Users/jeriwieringa/Dissertation/site/files/interact/"
In [3]:
stats = pd.read_csv(os.path.join(dataDir, 'corpus_metadata', 'yearlyStats.csv'))
In [4]:
stats[:10]
Out[4]:
filename title_abbrev year issue total_tokens total_unique_tokens counts
0 RH18950924-V72-39-page8.txt RH 1895 RH18950924-V72-39 1716 595 Counter({'the': 164, 'of': 70, 'and': 63, 'to'...
1 RH19100217-V87-07-page6.txt RH 1910 RH19100217-V87-07 1337 525 Counter({'the': 74, 'to': 53, 'of': 47, 'and':...
2 YI19080804-V56-31-page3.txt YI 1908 YI19080804-V56-31 867 365 Counter({'the': 114, 'of': 47, 'in': 34, 'is':...
3 YI19141124-V62-47-page14.txt YI 1914 YI19141124-V62-47 1266 515 Counter({'the': 84, 'of': 50, 'to': 40, 'and':...
4 LB19000101-V02-11-page16.txt LB 1900 LB19000101-V02-11 318 230 Counter({'a': 9, 'and': 8, 'for': 6, 'copies':...
5 LB19001201-V03-10-page4.txt LB 1900 LB19001201-V03-10 1256 472 Counter({'the': 63, 'to': 54, 'and': 43, 'a': ...
6 LibM19120701-V07-03-page31.txt LibM 1912 LibM19120701-V07-03 594 289 Counter({'the': 57, 'of': 42, 'to': 21, 'and':...
7 PHJ18890501-V04-05-page7.txt PHJ 1889 PHJ18890501-V04-05 760 407 Counter({'the': 51, 'of': 31, 'and': 22, 'a': ...
8 LB19031201-V06-12-page29.txt LB 1903 LB19031201-V06-12 642 335 Counter({'the': 37, 'in': 25, 'to': 17, 'and':...
9 PUR19150422-V14-37-page6.txt PUR 1915 PUR19150422-V14-37 1119 546 Counter({'the': 48, 'to': 35, 'and': 33, 'in':...
In [5]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import cufflinks as cf