Skip to content

datecollected assigned as current month and day #229

@mgaynor1

Description

@mgaynor1

When the data.dwc:day or data.dwc:month is missing, but a data.dwc:year is provided, the datecollected column is assigned the current month and day.

This error comes from this:

def dateGrabber(t, d):
r = {}
df = {
"records": [
["datemodified", "idigbio:dateModified"],
["datecollected", "dwc:eventDate"],
],
"mediarecords": [
["modified", "dcterms:modified"],
["datemodified", "idigbio:dateModified"],
],
"publishers": [
["datemodified", "idigbio:dateModified"],
],
"recordsets": [
["datemodified", "idigbio:dateModified"],
]
}
for f in df[t]:
fv = getfield(f[1], d)
if fv is not None:
# dates are more sensitivie to lower case then upper.
fv = fv.upper()
try:
x = dateutil.parser.parse(fv)
if x.tzinfo is None:
x = x.replace(tzinfo=pytz.utc)
try:
x < datetime.datetime.now(pytz.utc)
except:
x = x.replace(tzinfo=pytz.utc)
r[f[0]] = x
except:
pass
if f[0] not in r:
r[f[0]] = None
if "datecollected" in r and r["datecollected"] is None:
year = getfield("dwc:year", d)
month = getfield("dwc:month", d)
day = getfield("dwc:day", d)
sd_of_year = getfield("dwc:startDayOfYear", d)
if year is not None:
try:
if month is not None:
if day is not None:
r["datecollected"] = dateutil.parser.parse(
"{0}-{1}-{2}".format(year, month, day)).date()
elif sd_of_year is not None:
r["datecollected"] = (datetime.datetime(
year, 1, 1) + datetime.timedelta(locale.atoi(sd_of_year) - 1)).date()
else:
r["datecollected"] = dateutil.parser.parse(
"{0}-{1}".format(year, month)).date()
else:
r["datecollected"] = dateutil.parser.parse(year).date()
except:
pass
if "datecollected" in r and r["datecollected"] is not None:
r["startdayofyear"] = r["datecollected"].timetuple().tm_yday
return r

Here is the line causing this issue:

r["datecollected"] = dateutil.parser.parse(year).date()

This is really easy to recreate in python as well:

import dateutil.parser     
import datetime    
 
year = "2010"   
dateutil.parser.parse(year).date()

Out[1]: datetime.date(2010, 4, 1)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions