-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
When the data.dwc:day or data.dwc:month is missing, but a data.dwc:year is provided, the datecollected column is assigned the current month and day.
This error comes from this:
idb-backend/idb/helpers/conversions.py
Lines 544 to 606 in 3c9551c
| def dateGrabber(t, d): | |
| r = {} | |
| df = { | |
| "records": [ | |
| ["datemodified", "idigbio:dateModified"], | |
| ["datecollected", "dwc:eventDate"], | |
| ], | |
| "mediarecords": [ | |
| ["modified", "dcterms:modified"], | |
| ["datemodified", "idigbio:dateModified"], | |
| ], | |
| "publishers": [ | |
| ["datemodified", "idigbio:dateModified"], | |
| ], | |
| "recordsets": [ | |
| ["datemodified", "idigbio:dateModified"], | |
| ] | |
| } | |
| for f in df[t]: | |
| fv = getfield(f[1], d) | |
| if fv is not None: | |
| # dates are more sensitivie to lower case then upper. | |
| fv = fv.upper() | |
| try: | |
| x = dateutil.parser.parse(fv) | |
| if x.tzinfo is None: | |
| x = x.replace(tzinfo=pytz.utc) | |
| try: | |
| x < datetime.datetime.now(pytz.utc) | |
| except: | |
| x = x.replace(tzinfo=pytz.utc) | |
| r[f[0]] = x | |
| except: | |
| pass | |
| if f[0] not in r: | |
| r[f[0]] = None | |
| if "datecollected" in r and r["datecollected"] is None: | |
| year = getfield("dwc:year", d) | |
| month = getfield("dwc:month", d) | |
| day = getfield("dwc:day", d) | |
| sd_of_year = getfield("dwc:startDayOfYear", d) | |
| if year is not None: | |
| try: | |
| if month is not None: | |
| if day is not None: | |
| r["datecollected"] = dateutil.parser.parse( | |
| "{0}-{1}-{2}".format(year, month, day)).date() | |
| elif sd_of_year is not None: | |
| r["datecollected"] = (datetime.datetime( | |
| year, 1, 1) + datetime.timedelta(locale.atoi(sd_of_year) - 1)).date() | |
| else: | |
| r["datecollected"] = dateutil.parser.parse( | |
| "{0}-{1}".format(year, month)).date() | |
| else: | |
| r["datecollected"] = dateutil.parser.parse(year).date() | |
| except: | |
| pass | |
| if "datecollected" in r and r["datecollected"] is not None: | |
| r["startdayofyear"] = r["datecollected"].timetuple().tm_yday | |
| return r |
Here is the line causing this issue:
idb-backend/idb/helpers/conversions.py
Line 599 in 3c9551c
| r["datecollected"] = dateutil.parser.parse(year).date() |
This is really easy to recreate in python as well:
import dateutil.parser
import datetime
year = "2010"
dateutil.parser.parse(year).date()
Out[1]: datetime.date(2010, 4, 1)
Metadata
Metadata
Assignees
Labels
No labels