I have a directory structure which is composed of Countries, States or other subdivision, and Cities.
Here's a way to use pathlib objects in a dictionary to containerize the hierarchy
for example:
Also changed the get_dir_contents method to return all contents of directory. Individual entries can be tested for type by adding after line 62:
added qualifier:
-----------------------------------------------------------------------
Another method added Nov 29, 2020 (more polished)
I have one module for each project that lays out all of the directories, URL's and common file locations in a relative structure, using pathlib.
Here's a sample for a geocoding project:
the module is named GeoPaths.py and is imported by just about every other module in the project.
A neat feature of using something like this, is that you can run it on it's own in a copy of the project to
immediately set up your directory structure (it will create missing directories, but will leave already existing
directories alone):
GeoPaths.py
here's the code that would do that:
MyModule.py
Increment by one for each sublevel, and paths will automatically be adjusted for all source code in that subdirectory.
Here's a way to use pathlib objects in a dictionary to containerize the hierarchy
from pathlib import Path
import os
import inspect
class CountryInfo:
def __init__(self):
os.chdir(os.path.dirname(__file__))
self.home = Path('.')
self.home.mkdir(exist_ok=True)
self.bpaths = {
'data': self.home / '..' / 'data',
'Country': {
'country': self.home / '..' / 'data' / 'Country',
'USA': {
'usa': self.home / '..' / 'data' / 'Country' / 'USA',
'Alaska': self.home / '..' / 'data' / 'Country' / 'USA' / 'Alaska',
'Alabama': self.home / '..' / 'data' / 'Country' / 'USA' / 'Alabama',
'Arkansas': self.home / '..' / 'data' / 'Country' / 'USA' / 'Arkansas',
'American Samoa': self.home / '..' / 'data' / 'Country' / 'USA' / 'AmericanSamoa',
'Arizona': self.home / '..' / 'data' / 'Country' / 'USA' / 'Arizona'
# ...
}
}
}
self.create_all_directories(self.bpaths)
def get_dir_contents(self, path):
dir_dict = None
if isinstance(path, Path) and path.exists():
entries = [entry for entry in path.iterdir()]
dir_dict = {}
for entry in entries:
dd = dir_dict[f"'{entry.name}'"] = {}
dd['type'] = self.get_type(entry)
# print(f'entry: {entry.name}, type {type(entry)}')
print(quit())
return dir_dict
def get_dir_contents(self, path):
if isinstance(path, Path) and path.exists():
return [entry for entry in path.iterdir()]
return None
def create_all_directories(self, path):
for key, value in path.items():
if isinstance(value, dict):
self.create_all_directories(value)
elif isinstance(value, Path) and not value.is_file():
value.mkdir(exist_ok=True)
def testit():
bp = CountryInfo()
Arizona = bp.bpaths['Country']['USA']['Arizona']
Scottsdale = Arizona / 'Scottsdale'
files = bp.get_dir_contents(Scottsdale)
if files is not None:
for file in files:
print(f'{file}')
else:
print('Scottsdale directory is empty')
if __name__ == '__main__':
testit()results of example:Output:../data/Country/USA/Arizona/Scottsdale/coslicense.pdf
../data/Country/USA/Arizona/Scottsdale/ct_BusinessLicences.csvSince the elements of the dictionary are pathlib objects, they inherit all of the methods of pathlibfor example:
print(f'Scottsdale path: {Scottsdale.resolve()})will return the absolute path of the Scottsdale directory (I replaced root paths with ...):Output:Scottsdale path: .../Data-2TB/BusinessLists/data/Country/USA/Arizona/ScottsdaleUpodate: Added some error checking, and a new method create_all_directories which will create empty directories if they don't already exist.Also changed the get_dir_contents method to return all contents of directory. Individual entries can be tested for type by adding after line 62:
for file in files:
if file.is_dir():
print(f'{file.name} is a directory')
elif file.is_file():
print(f'{file.name} is a regular file')
elif file.is_symlink():
print(f'{file.name} is a symbolic link')
# and so on, see pathlib (3.7 is most complete if running that python version) )docs for all possibilitiesUpdate Jul25: 07:36 ESTadded qualifier:
and not value.is_file()to line 51, needed to prevent crash if value is path + file, url, etc.
-----------------------------------------------------------------------
Another method added Nov 29, 2020 (more polished)
I have one module for each project that lays out all of the directories, URL's and common file locations in a relative structure, using pathlib.
Here's a sample for a geocoding project:
the module is named GeoPaths.py and is imported by just about every other module in the project.
A neat feature of using something like this, is that you can run it on it's own in a copy of the project to
immediately set up your directory structure (it will create missing directories, but will leave already existing
directories alone):
GeoPaths.py
import os
from pathlib import Path
class GeoPaths:
def __init__(self, depth=0):
dir_depth = abs(depth)
os.chdir(os.path.abspath(os.path.dirname(__file__)))
self.homepath = Path('.')
while dir_depth:
self.homepath = self.homepath / '..'
dir_depth -= 1
rootpath = self.homepath / '..'
self.docspath = rootpath / 'docs'
self.docspath.mkdir(exist_ok=True)
self.testspath = rootpath / 'tests'
self.testspath.mkdir(exist_ok=True)
self.datapath = rootpath / 'data'
self.datapath.mkdir(exist_ok=True)
self.csvpath = self.datapath / 'csv'
self.csvpath.mkdir(exist_ok=True)
self.htmlpath = self.datapath / 'html'
self.htmlpath.mkdir(exist_ok=True)
self.jsonpath = self.datapath / 'json'
self.jsonpath.mkdir(exist_ok=True)
self.MasterAddressPath = self.datapath / 'MasterAddressDatabase'
self.MasterAddressPath.mkdir(exist_ok=True)
self.prettypath = self.datapath / 'pretty'
self.prettypath.mkdir(exist_ok=True)
self.tmppath = self.datapath / 'tmp'
self.tmppath.mkdir(exist_ok=True)
# Osm data is arranged by state and file type.
# A rather longdirectory tree, but laid out here for ease of use in software
self.osmpath = self.datapath / 'osm'
self.osmpath.mkdir(exist_ok=True)
self.geofabrik_datapath = self.osmpath / 'GeofabrikAndCensus'
self.geofabrik_datapath.mkdir(exist_ok=True)
# URL's
self.TigerLineGeoDatabase: 'https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-geodatabase-file.html'
self.qgis_plugins = 'https://plugins.qgis.org/plugins/?page=1&&'
self.osmfilelink = 'https://ftp.osuosl.org/pub/openstreetmap/planet/'
self.geofabrikserver = 'https://download.geofabrik.de/north-america.html'
# Common files:
self.geofabrikjson = self.jsonpath / 'GeofabrikLinks.json'
if __name__ == '__main__':
GeoPaths()Before running the script, my directory structure for a new project looks like this:Output:├── src
│ └── GeoPaths.py
└── venv
...After running GeoPaths.py directory structure looks like this:$ . ./venv/bin/activate (venv)$ python src/GeoPaths.py
Output:.
├── data
│ ├── csv
│ ├── html
│ ├── json
│ ├── MasterAddressDatabase
│ ├── osm
│ │ └── GeofabrikAndCensus
│ ├── pretty
│ └── tmp
├── docs
├── src
│ └── GeoPaths.py
├── tests
└── venv
...Now, assume you have a module named MyModule.py in the src diretory, and you want to open a json file named sillyfile.json.here's the code that would do that:
MyModule.py
from GeoPaths import GeoPaths
import json
class MySillyClass:
def __init__(self):
self.gpaths = GeoPaths()
self.jsonfile = self.gpaths.jsonpath / 'sillyfile.json'
def create_dict(self):
sillydict = {
'Cowboys': '21',
'GreenBayPackers': '7'
}
with self.jsonfile.open('w') as fp:
json.dump(sillydict, fp)
def read_it_back(self):
with self.jsonfile.open() as fp:
read_sillydict = json.load(fp)
for key, value in read_sillydict.items():
print(f"{key}: {value}")
def main():
mcc = MySillyClass()
mcc.create_dict()
mcc.read_it_back()
if __name__ == '__main__':
main()Results of running this script:Output:Cowboys: 21
GreenBayPackers: 7The depth attribute in GeoPaths.py can be used when code in in a subdirectory of src.Increment by one for each sublevel, and paths will automatically be adjusted for all source code in that subdirectory.
