Commit cdf8273c authored by Thomas Robert's avatar Thomas Robert

Init data science project structure

parents
/.env
/config-private.yml
.gitkeep
/data
/results
.env: config.yml config-private.yml
python src/misc/yaml-to-env.py
# Data science project structure
Based on https://drivendata.github.io/cookiecutter-data-science/
```
.
├── Makefile <- tasks
├── config.yml <- config file in YAML, can be exported as env vars if needed
├── config-private.yml <- config file with private config (password, api keys, etc.)
├── data
│   └── raw
│   ├── intermediate
│   ├── processed
│   ├── temp
├── results
│   ├── outputs
│   ├── models
├── documents
│   ├── docs
│   ├── images
│   └── references
├── notebooks <- notebooks for explorations / prototyping
└── src <- all source code, internal org as needed
```
#!/usr/bin/env python
import yaml
import pipes
# Merge data structures
def merge(a, b):
if isinstance(a, dict) and isinstance(b, dict):
d = dict(a)
d.update({k: merge(a.get(k, None), b[k]) for k in b})
return d
if isinstance(a, list) and isinstance(b, list):
return [merge(x, y) for x, y in itertools.izip_longest(a, b)]
return a if b is None else b
# Read config file, keep env
def readFileKeepEnv(filename):
f = open("config.yml", "r")
out = ""
for line in f:
if "#env" in line:
out += line + "\n"
return out
# Load config files
config = yaml.load(readFileKeepEnv("config.yml"))
config_priv = yaml.load(readFileKeepEnv("config-private.yml"))
config = merge(config, config_priv)
print config
# Export as env vars
# TODO generalise to nested dict
envFile = ""
for k, v in config.items():
k = pipes.quote(k)
v = pipes.quote(v)
envFile += "%s=%s\n" % (k, v)
open(".env", "w").write(envFile)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment