Add missing fields, better license selector, use ordered output

rlafuente · rlafuente · commit 18cc2b6d3ff2 · 2016-11-12T15:40:47.000Z
diff --git a/datapaka b/datapaka
@@ -7,6 +7,7 @@ import unicodecsv as csv
 from jsontableschema import infer
 import glob
 import json
+from collections import OrderedDict
 
 '''
 - do we have a data/ directory?
@@ -18,7 +19,9 @@ import json
 from colorama import init, Fore, Back, Style
 init(autoreset=True)
 
-dp = datapackage.DataPackage()
+
+# ensure we get the output in fixed order by specifying the descriptor argument
+dp = datapackage.DataPackage(descriptor=OrderedDict())
 
 # Splash screen :3
 print Fore.YELLOW + "   ___    ___  "
@@ -54,7 +57,7 @@ dp.descriptor['name'] = raw_input(Style.BRIGHT + "? ") or default_slug
 
 # Description
 print "Give me a short, human " + Style.BRIGHT + "description" + Style.RESET_ALL + " for this data package."
-print Style.DIM + "A description gives more information about what's inside. 'Property prices in Berlin in April 2015' or 'UN population density statistics for the 2003-2013 period' let people know more specific details. (optional field)"
+print Style.DIM + "A description gives more information about what's inside. 'Property prices in Berlin in April 2015' or 'UN population density statistics for the 2003-2013 period' let people know more specific details. Markdown formatting can be used. (optional field)"
 dp.descriptor['description'] = raw_input(Style.BRIGHT + "? ")
 
 # Version
@@ -63,11 +66,34 @@ print "What's the package " + Style.BRIGHT + "version number" + Style.RESET_ALL
 print Style.DIM + "The version number tracks changes and improvements on the data package. If you're starting a new one, go with the default value of 0.1.0, and update it whenever there is a change in the data layout."
 dp.descriptor['version'] = raw_input(Style.BRIGHT + "? ") or default_version
 
+# Homepage
+print "What's the " + Style.BRIGHT + "homepage" + Style.RESET_ALL + " for this data package?"
+print Style.DIM + "Please include the 'http://' prefix. (optional field)"
+h = raw_input(Style.BRIGHT + "? ")
+if h:
+    dp.descriptor['homepage'] = h
+
 # License
-default_license = "PDDL-1.0"
+default_license = "1"
+license_options = {
+    "1": "ODC-PDDL-1.0",
+    "2": "ODbL-1.0",
+    "3": "ODC-BY-1.0",
+    "4": "CC0-1.0",
+    "5": "CC-BY-4.0",
+    "6": "CC-BY-SA-4.0",
+}
 print "What's the package " + Style.BRIGHT + "license" + Style.RESET_ALL + "? [Leave blank for '%s'] " % default_license
-print Style.DIM + "The license field is the identifier for the license this package is to be published under. We'll have a better menu to pick licenses, but in the meantime you can find the license identifiers at http://licenses.opendefinition.org/. The default is the Public Domain Dedication License."
-dp.descriptor['license'] = raw_input(Style.BRIGHT + "? ") or default_license
+print Style.DIM + "The license field states the license under which this package is to be published. Read more about each license at http://opendefinition.org/licenses/."
+print
+print('1) Public Domain Dedication and License (public domain)')
+print('2) Open Database License (attribution, sharealike)')
+print('3) Open Data Commons Attribution License (attribution)')
+print('4) Creative Commons Zero (public domain)')
+print('5) Creative Commons Attribution')
+print('6) Creative Commons Attribution-ShareAlike')
+selected = raw_input(Style.BRIGHT + "? ") or default_license
+dp.descriptor['license'] = license_options[selected.strip()]
 
 # Sources
 dp.descriptor['sources'] = []
@@ -83,6 +109,34 @@ while another:
     if add_another not in ("y", "Y"):
         another = False
 
+# Contributors
+dp.descriptor['contributors'] = []
+another = True
+print "Now I want to know about the " + Style.BRIGHT + "contributors" + Style.RESET_ALL + " of this data package."
+print Style.DIM + "I will ask you the name, e-mail and website URL for each, and then you'll be asked if you want to input another contributor or move ahead."
+while another:
+    contrib_name = raw_input(Style.BRIGHT + "Contributor name? ")
+    contrib_email = raw_input(Style.BRIGHT + "Contributor e-mail (optional)? ")
+    contrib_url = raw_input(Style.BRIGHT + "Contributor URL (optional)? ")
+    c = {"name": contrib_name}
+    if contrib_email:
+        c["email"] = contrib_email
+    if contrib_url:
+        c["web"] = contrib_url
+    dp.descriptor['contributors'].append(c)
+    print
+    add_another = raw_input(Style.BRIGHT + "Add another contributor? [n]")
+    if add_another not in ("y", "Y"):
+        another = False
+
+# Keywords
+print "Tell me some " + Style.BRIGHT + "keywords" + Style.RESET_ALL + " (tags) for this package so that users can find it in catalogs. [optional]"
+print Style.DIM + "Keywords can have spaces; separate distinct keywords with commas."
+k = raw_input(Style.BRIGHT + "? ")
+if k:
+    dp.descriptor['keywords'] = [w.strip() for w in k.split(",")]
+
+
 # CSV files
 dp.descriptor['resources'] = []
 csv_files = glob.glob('data/*.csv')
@@ -101,14 +155,14 @@ for filepath in csv_files:
     slug = raw_input("  Slug for this file? [%s]" % default_slug)
     resource_name = slug or default_slug
     with open(filepath, 'rb') as f:
-        headers = f.readline().rstrip('\n').split(',')
+        headers = [s.decode("utf-8") for s in f.readline().rstrip('\n').split(',')]
         values = csv.reader(f, encoding="utf-8")
         print "  Inferring column types, this might take a bit..."
         schema = infer(headers, values)
-
         fields = []
         for field in schema['fields']:
             fieldname = field['name']
+            print type(fieldname)
             field['title'] = raw_input("  Human title for field '%s'? " % fieldname)
             field['description'] = raw_input("  Short description for field '%s'? " % fieldname)
             fields.append(field)
@@ -127,7 +181,7 @@ if not os.path.exists('datapackage.json'):
     outfilename = 'datapackage.json'
 else:
     outfilename = 'datapackage-new.json'
-with codecs.open('datapackage-new.json', 'w', 'utf-8') as f:
+with codecs.open(outfilename, 'w', 'utf-8') as f:
     f.write(out)
 
 print "All done! File saved as " + Style.BRIGHT + outfilename + Style.RESET_ALL + ". Ta!"