create-list-of-US-place-names

Table of Contents

Library for working with shapefiles is pyshp (Python Shapefile Library) https://github.com/GeospatialPython/pyshp

In [1]:
import shapefile
import datetime

Data for the placenames comes from the USGS Cities and Towns dataset. Downloaded from https://nationalmap.gov/small_scale/atlasftp.html?openChapters=chpref#chpref on January 3, 2017.

In [2]:
sf = shapefile.Reader("/Users/jeriwieringa/Dissertation/drafts/data/external-data/citiesx020_nt00007/citiesx020.shp")
In [3]:
sf.fields
Out[3]:
[('DeletionFlag', 'C', 1, 0),
 ['CITIESX020', 'N', 11, 0],
 ['FEATURE', 'C', 27, 0],
 ['NAME', 'C', 48, 0],
 ['POP_RANGE', 'C', 21, 0],
 ['POP_2000', 'N', 8, 0],
 ['FIPS55', 'C', 5, 0],
 ['COUNTY', 'C', 55, 0],
 ['FIPS', 'C', 5, 0],
 ['STATE', 'C', 2, 0],
 ['STATE_FIPS', 'C', 2, 0],
 ['DISPLAY', 'N', 1, 0]]
In [4]:
records = sf.records()
In [5]:
len(records)
Out[5]:
35432
In [6]:
records[:3]
Out[6]:
[[1,
  'Populated Place',
  'Attu',
  'Undetermined',
  -99999,
  '04540',
  'Aleutians West Census Area',
  '02016',
  'AK',
  '02',
  1],
 [2,
  'Populated Place',
  'Point Hope',
  '0 - 9,999',
  757,
  '61630',
  'North Slope Borough',
  '02185',
  'AK',
  '02',
  0],
 [3,
  'Populated Place',
  'Point Lay',
  'Undetermined',
  -99999,
  '61700',
  'North Slope Borough',
  '02185',
  'AK',
  '02',
  1]]
In [7]:
placenames = []
for each in records:
    placenames.append(each[2])
In [8]:
len(placenames)
Out[8]:
35432
In [9]:
placenames[:10]
Out[9]:
['Attu',
 'Point Hope',
 'Point Lay',
 'Diomede',
 'Gambell',
 'Tin City',
 'Savoonga',
 'Shishmaref',
 'Noatak',
 'Port Clarence']
In [10]:
with open("/Users/jeriwieringa/Dissertation/drafts/data/word-lists/{}-place-names.txt".format(str(datetime.date.today())), "w") as outfile:
    for name in placenames:
        if len(name.split()) > 1:
            words = name.split()
            for word in words:
                outfile.write("{}\n".format(word.lower()))
        else:
            outfile.write("{}\n".format(name.lower()))
In [11]:
# %load shared_elements/system_info.py
import IPython
print (IPython.sys_info())
!pip freeze
{'commit_hash': '5c9c918',
 'commit_source': 'installation',
 'default_encoding': 'UTF-8',
 'ipython_path': '/Users/jeriwieringa/miniconda3/envs/dissertation2/lib/python3.5/site-packages/IPython',
 'ipython_version': '5.1.0',
 'os_name': 'posix',
 'platform': 'Darwin-16.3.0-x86_64-i386-64bit',
 'sys_executable': '/Users/jeriwieringa/miniconda3/envs/dissertation2/bin/python',
 'sys_platform': 'darwin',
 'sys_version': '3.5.2 |Continuum Analytics, Inc.| (default, Jul  2 2016, '
                '17:52:12) \n'
                '[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]'}
anaconda-client==1.5.5
appnope==0.1.0
argh==0.26.1
blinker==1.4
bokeh==0.12.3
boto==2.43.0
bz2file==0.98
chest==0.2.3
cloudpickle==0.2.1
clyent==1.2.2
dask==0.12.0
datashader==0.4.0
datashape==0.5.2
decorator==4.0.10
docutils==0.12
doit==0.29.0
gensim==0.12.4
Ghost.py==0.2.3
ghp-import2==1.0.1
gspread==0.4.1
HeapDict==1.0.0
httplib2==0.9.2
husl==4.0.3
ipykernel==4.5.2
ipython==5.1.0
ipython-genutils==0.1.0
ipywidgets==5.2.2
Jinja2==2.8
jsonschema==2.5.1
jupyter==1.0.0
jupyter-client==4.4.0
jupyter-console==5.0.0
jupyter-contrib-core==0.3.0
jupyter-contrib-nbextensions==0.2.2
jupyter-core==4.2.1
jupyter-highlight-selected-word==0.0.5
jupyter-latex-envs==1.3.5.4
jupyter-nbextensions-configurator==0.2.3
llvmlite==0.14.0
locket==0.2.0
Logbook==1.0.0
lxml==3.5.0
MacFSEvents==0.7
Mako==1.0.4
Markdown==2.6.7
MarkupSafe==0.23
mistune==0.7.3
multipledispatch==0.4.9
natsort==4.0.4
nb-anacondacloud==1.2.0
nb-conda==2.0.0
nb-conda-kernels==2.0.0
nb-config-manager==0.1.3
nbbrowserpdf==0.2.1
nbconvert==4.2.0
nbformat==4.2.0
nbpresent==3.0.2
networkx==1.11
Nikola==7.7.7
nltk==3.2.1
notebook==4.2.3
numba==0.29.0
numpy==1.11.2
oauth2client==4.0.0
odo==0.5.0
pandas==0.19.1
partd==0.3.6
path.py==0.0.0
pathtools==0.1.2
pexpect==4.0.1
pickleshare==0.7.4
Pillow==3.4.2
prompt-toolkit==1.0.9
psutil==4.3.0
ptyprocess==0.5.1
pyasn1==0.1.9
pyasn1-modules==0.0.8
pycrypto==2.6.1
Pygments==2.1.3
PyPDF2==1.25.1
PyRSS2Gen==1.1
pyshp==1.2.10
python-dateutil==2.6.0
pytz==2016.10
PyYAML==3.12
pyzmq==16.0.2
qtconsole==4.2.1
requests==2.12.3
rsa==3.4.2
scipy==0.18.1
simplegeneric==0.8.1
six==1.10.0
smart-open==1.3.5
terminado==0.6
textblob==0.11.1
toolz==0.8.1
tornado==4.4.2
traitlets==4.3.1
Unidecode==0.4.19
watchdog==0.8.3
wcwidth==0.1.7
webassets==0.11.1
widgetsnbextension==1.2.6
ws4py==0.3.4
xarray==0.8.2
Yapsy==1.11.223
In [ ]: