Skip to content

Commit 18cc2b6

Browse files
committed
Add missing fields, better license selector, use ordered output
1 parent 8609b81 commit 18cc2b6

File tree

1 file changed

+62
-8
lines changed

1 file changed

+62
-8
lines changed

datapaka

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import unicodecsv as csv
77
from jsontableschema import infer
88
import glob
99
import json
10+
from collections import OrderedDict
1011

1112
'''
1213
- do we have a data/ directory?
@@ -18,7 +19,9 @@ import json
1819
from colorama import init, Fore, Back, Style
1920
init(autoreset=True)
2021

21-
dp = datapackage.DataPackage()
22+
23+
# ensure we get the output in fixed order by specifying the descriptor argument
24+
dp = datapackage.DataPackage(descriptor=OrderedDict())
2225

2326
# Splash screen :3
2427
print Fore.YELLOW + " ___ ___ "
@@ -54,7 +57,7 @@ dp.descriptor['name'] = raw_input(Style.BRIGHT + "? ") or default_slug
5457

5558
# Description
5659
print "Give me a short, human " + Style.BRIGHT + "description" + Style.RESET_ALL + " for this data package."
57-
print Style.DIM + "A description gives more information about what's inside. 'Property prices in Berlin in April 2015' or 'UN population density statistics for the 2003-2013 period' let people know more specific details. (optional field)"
60+
print Style.DIM + "A description gives more information about what's inside. 'Property prices in Berlin in April 2015' or 'UN population density statistics for the 2003-2013 period' let people know more specific details. Markdown formatting can be used. (optional field)"
5861
dp.descriptor['description'] = raw_input(Style.BRIGHT + "? ")
5962

6063
# Version
@@ -63,11 +66,34 @@ print "What's the package " + Style.BRIGHT + "version number" + Style.RESET_ALL
6366
print Style.DIM + "The version number tracks changes and improvements on the data package. If you're starting a new one, go with the default value of 0.1.0, and update it whenever there is a change in the data layout."
6467
dp.descriptor['version'] = raw_input(Style.BRIGHT + "? ") or default_version
6568

69+
# Homepage
70+
print "What's the " + Style.BRIGHT + "homepage" + Style.RESET_ALL + " for this data package?"
71+
print Style.DIM + "Please include the 'http://' prefix. (optional field)"
72+
h = raw_input(Style.BRIGHT + "? ")
73+
if h:
74+
dp.descriptor['homepage'] = h
75+
6676
# License
67-
default_license = "PDDL-1.0"
77+
default_license = "1"
78+
license_options = {
79+
"1": "ODC-PDDL-1.0",
80+
"2": "ODbL-1.0",
81+
"3": "ODC-BY-1.0",
82+
"4": "CC0-1.0",
83+
"5": "CC-BY-4.0",
84+
"6": "CC-BY-SA-4.0",
85+
}
6886
print "What's the package " + Style.BRIGHT + "license" + Style.RESET_ALL + "? [Leave blank for '%s'] " % default_license
69-
print Style.DIM + "The license field is the identifier for the license this package is to be published under. We'll have a better menu to pick licenses, but in the meantime you can find the license identifiers at http://licenses.opendefinition.org/. The default is the Public Domain Dedication License."
70-
dp.descriptor['license'] = raw_input(Style.BRIGHT + "? ") or default_license
87+
print Style.DIM + "The license field states the license under which this package is to be published. Read more about each license at http://opendefinition.org/licenses/."
88+
print
89+
print('1) Public Domain Dedication and License (public domain)')
90+
print('2) Open Database License (attribution, sharealike)')
91+
print('3) Open Data Commons Attribution License (attribution)')
92+
print('4) Creative Commons Zero (public domain)')
93+
print('5) Creative Commons Attribution')
94+
print('6) Creative Commons Attribution-ShareAlike')
95+
selected = raw_input(Style.BRIGHT + "? ") or default_license
96+
dp.descriptor['license'] = license_options[selected.strip()]
7197

7298
# Sources
7399
dp.descriptor['sources'] = []
@@ -83,6 +109,34 @@ while another:
83109
if add_another not in ("y", "Y"):
84110
another = False
85111

112+
# Contributors
113+
dp.descriptor['contributors'] = []
114+
another = True
115+
print "Now I want to know about the " + Style.BRIGHT + "contributors" + Style.RESET_ALL + " of this data package."
116+
print Style.DIM + "I will ask you the name, e-mail and website URL for each, and then you'll be asked if you want to input another contributor or move ahead."
117+
while another:
118+
contrib_name = raw_input(Style.BRIGHT + "Contributor name? ")
119+
contrib_email = raw_input(Style.BRIGHT + "Contributor e-mail (optional)? ")
120+
contrib_url = raw_input(Style.BRIGHT + "Contributor URL (optional)? ")
121+
c = {"name": contrib_name}
122+
if contrib_email:
123+
c["email"] = contrib_email
124+
if contrib_url:
125+
c["web"] = contrib_url
126+
dp.descriptor['contributors'].append(c)
127+
print
128+
add_another = raw_input(Style.BRIGHT + "Add another contributor? [n]")
129+
if add_another not in ("y", "Y"):
130+
another = False
131+
132+
# Keywords
133+
print "Tell me some " + Style.BRIGHT + "keywords" + Style.RESET_ALL + " (tags) for this package so that users can find it in catalogs. [optional]"
134+
print Style.DIM + "Keywords can have spaces; separate distinct keywords with commas."
135+
k = raw_input(Style.BRIGHT + "? ")
136+
if k:
137+
dp.descriptor['keywords'] = [w.strip() for w in k.split(",")]
138+
139+
86140
# CSV files
87141
dp.descriptor['resources'] = []
88142
csv_files = glob.glob('data/*.csv')
@@ -101,14 +155,14 @@ for filepath in csv_files:
101155
slug = raw_input(" Slug for this file? [%s]" % default_slug)
102156
resource_name = slug or default_slug
103157
with open(filepath, 'rb') as f:
104-
headers = f.readline().rstrip('\n').split(',')
158+
headers = [s.decode("utf-8") for s in f.readline().rstrip('\n').split(',')]
105159
values = csv.reader(f, encoding="utf-8")
106160
print " Inferring column types, this might take a bit..."
107161
schema = infer(headers, values)
108-
109162
fields = []
110163
for field in schema['fields']:
111164
fieldname = field['name']
165+
print type(fieldname)
112166
field['title'] = raw_input(" Human title for field '%s'? " % fieldname)
113167
field['description'] = raw_input(" Short description for field '%s'? " % fieldname)
114168
fields.append(field)
@@ -127,7 +181,7 @@ if not os.path.exists('datapackage.json'):
127181
outfilename = 'datapackage.json'
128182
else:
129183
outfilename = 'datapackage-new.json'
130-
with codecs.open('datapackage-new.json', 'w', 'utf-8') as f:
184+
with codecs.open(outfilename, 'w', 'utf-8') as f:
131185
f.write(out)
132186

133187
print "All done! File saved as " + Style.BRIGHT + outfilename + Style.RESET_ALL + ". Ta!"

0 commit comments

Comments
 (0)