Skip to content

Commit 3c9551c

Browse files
authored
DRAFT: Cherry from py3 (#224)
* non-records types no longer get invalid flags set (such as `flag_dwc_basisofrecord_invalid` on a publisher). * add tests
1 parent f5eadb7 commit 3c9551c

File tree

2 files changed

+81
-62
lines changed

2 files changed

+81
-62
lines changed

idb/helpers/conversions.py

Lines changed: 37 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -844,74 +844,50 @@ def fixBOR(t, r):
844844
These comparisons using lowercased version seem to work, even though supplied values generally
845845
match the class name in the standard. e.g. To find PreservedSpecimen we look for "preserved".
846846
"""
847-
if filled("basisofrecord", r):
848-
if "preserved" in r["basisofrecord"]:
849-
r["basisofrecord"] = "preservedspecimen"
850-
elif "fossil" in r["basisofrecord"]:
851-
r["basisofrecord"] = "fossilspecimen"
852-
elif "living" in r["basisofrecord"]:
853-
r["basisofrecord"] = "livingspecimen"
854-
elif "material" in r["basisofrecord"]:
855-
r["basisofrecord"] = "materialsample"
856-
elif "specimen" in r["basisofrecord"]:
857-
r["basisofrecord"] = "preservedspecimen"
858-
elif "machine" in r["basisofrecord"] and "observation" in r["basisofrecord"]:
859-
r["basisofrecord"] = "machineobservation"
860-
elif "observation" in r["basisofrecord"]:
861-
r["basisofrecord"] = "humanobservation"
862-
elif "occurrence" in r["basisofrecord"]:
863-
r["basisofrecord"] = "occurrence"
847+
if t == "records":
848+
if filled("basisofrecord", r):
849+
if "preserved" in r["basisofrecord"]:
850+
r["basisofrecord"] = "preservedspecimen"
851+
elif "fossil" in r["basisofrecord"]:
852+
r["basisofrecord"] = "fossilspecimen"
853+
elif "living" in r["basisofrecord"]:
854+
r["basisofrecord"] = "livingspecimen"
855+
elif "material" in r["basisofrecord"]:
856+
r["basisofrecord"] = "materialsample"
857+
elif "specimen" in r["basisofrecord"]:
858+
r["basisofrecord"] = "preservedspecimen"
859+
elif "machine" in r["basisofrecord"] and "observation" in r["basisofrecord"]:
860+
r["basisofrecord"] = "machineobservation"
861+
elif "observation" in r["basisofrecord"]:
862+
r["basisofrecord"] = "humanobservation"
863+
elif "occurrence" in r["basisofrecord"]:
864+
r["basisofrecord"] = "occurrence"
865+
else:
866+
r["basisofrecord"] = None
867+
r["flag_dwc_basisofrecord_removed"] = True
868+
r["flag_dwc_basisofrecord_invalid"] = True
864869
else:
865-
r["basisofrecord"] = None
866-
r["flag_dwc_basisofrecord_removed"] = True
867870
r["flag_dwc_basisofrecord_invalid"] = True
868871

869-
# Disable based on feedback from John W. and Joanna
870-
# if r["basisofrecord"] == "preservedspecimen":
871-
# paleo_terms = [
872-
# "bed",
873-
# "group",
874-
# "member",
875-
# "formation",
876-
# "lowestbiostratigraphiczone",
877-
# "lithostratigraphicterms",
878-
# "earliestperiodorlowestsystem",
879-
# "earliesteraorlowesterathem",
880-
# "earliestepochorlowestseries",
881-
# "earliestageorloweststage",
882-
# "latesteraorhighesterathem",
883-
# "latestepochorhighestseries",
884-
# "latestageorhigheststage",
885-
# "latestperiodorhighestsystem",
886-
# ]
887-
888-
# for f in paleo_terms:
889-
# if filled(f,r):
890-
# r["flag_dwc_basisofrecord_paleo_conflict"] = True
891-
# r["flag_dwc_basisofrecord_replaced"] = True
892-
# r["basisofrecord"] = "fossilspecimen"
893-
# break
894-
else:
895-
r["flag_dwc_basisofrecord_invalid"] = True
896-
897872
def fix_taxon_rank(t, r):
898-
if filled("taxonrank", r):
899-
if r["taxonrank"] in taxon_rank.mapping:
900-
if taxon_rank.mapping[r["taxonrank"]] is None:
873+
if t == "records":
874+
if filled("taxonrank", r):
875+
if r["taxonrank"] in taxon_rank.mapping:
876+
if taxon_rank.mapping[r["taxonrank"]] is None:
877+
r["taxonrank"] = None
878+
r["flag_dwc_taxonrank_removed"] = True
879+
r["flag_dwc_taxonrank_invalid"] = True
880+
elif r["taxonrank"] != taxon_rank.mapping[r["taxonrank"]]:
881+
r["taxonrank"] = taxon_rank.mapping[r["taxonrank"]]
882+
r["flag_dwc_taxonrank_replaced"] = True
883+
else:
884+
pass # Taxon Rank is in the mapping as an identity.
885+
elif r["taxonrank"] not in taxon_rank.acceptable:
901886
r["taxonrank"] = None
902887
r["flag_dwc_taxonrank_removed"] = True
903888
r["flag_dwc_taxonrank_invalid"] = True
904-
elif r["taxonrank"] != taxon_rank.mapping[r["taxonrank"]]:
905-
r["taxonrank"] = taxon_rank.mapping[r["taxonrank"]]
906-
r["flag_dwc_taxonrank_replaced"] = True
907889
else:
908-
pass # Taxon Rank is in the mapping as an identity.
909-
elif r["taxonrank"] not in taxon_rank.acceptable:
910-
r["taxonrank"] = None
911-
r["flag_dwc_taxonrank_removed"] = True
912-
r["flag_dwc_taxonrank_invalid"] = True
913-
else:
914-
pass # Taxon Rank is Acceptable, but not mapped
890+
pass # Taxon Rank is Acceptable, but not mapped
915891

916892
# Step, count, ms, ms/count action
917893
# rc 1000 354.179 0.354179 record corrector
@@ -945,6 +921,7 @@ def grabAll(t, d):
945921
r.update(collect_genbank_sequences(t,d))
946922
# Done with non-dependant fields.
947923

924+
948925
gs_sn_crossfill(t, r)
949926
fixBOR(t, r)
950927
fix_taxon_rank(t, r)
@@ -961,7 +938,6 @@ def grabAll(t, d):
961938
if k.startswith("flag_"):
962939
r["flags"].append("_".join(k.split("_")[1:]))
963940
r["dqs"] = score(t, r)
964-
965941
return r
966942

967943

tests/idb/test_helpers_conversions.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ def test_scientific_name_filler(self):
264264

265265

266266
class TestGrabAll(unittest.TestCase):
267-
def test_grab_all(self):
267+
def test_grab_all_on_record(self):
268268
r = {
269269
"idigbio:uuid": "0000012b-9bb8-42f4-ad3b-c958cb22ae45",
270270
"idigbio:etag": "cb7d64ec3aef36fa4dec6a028b818e331a67aacc",
@@ -387,6 +387,49 @@ def test_grab_all(self):
387387
self.assertEqual(output['hasImage'], True)
388388
self.assertEqual(output['hasMedia'], True)
389389

390+
def test_grab_all_on_mediarecord(self):
391+
r = {
392+
"uuid": "256098cf-723d-4633-a827-7fbf60a103ec",
393+
"type": "mediarecords",
394+
"etag": "5da26d3ee501516557689e55e1b1dd6c195414a2",
395+
"data": {
396+
"dcterms:type": "StillImage",
397+
"ac:providerManagedID": "urn:uuid:21943c26-e2d2-49d8-8626-18f1d5eeb56d",
398+
"ac:subtype": "Photograph",
399+
"ac:metadataLanguage": "en",
400+
"xmpRights:UsageTerms": "CC BY-NC-SA (Attribution-NonCommercial-ShareAlike)",
401+
"ac:thumbnailAccessURI": "https://bryophyteportal.org/imglib/storage/srp/bryophytes/SRP-B-0000/SRP-B-0000026_tn.jpg",
402+
"dcterms:format": "image/jpeg",
403+
"ac:goodQualityAccessURI": "https://bryophyteportal.org/imglib/storage/srp/bryophytes/SRP-B-0000/SRP-B-0000026.JPG",
404+
"coreid": "2212837",
405+
"dcterms:identifier": "https://bryophyteportal.org/imglib/storage/srp/bryophytes/SRP-B-0000/SRP-B-0000026_lg.jpg",
406+
"xmpRights:Owner": "Boise State University Lichen Herbarium (SRP)",
407+
"dcterms:rights": "http://creativecommons.org/licenses/by-nc/3.0/",
408+
"ac:accessURI": "https://bryophyteportal.org/imglib/storage/srp/bryophytes/SRP-B-0000/SRP-B-0000026_lg.jpg",
409+
"xmp:MetadataDate": "2013-06-05 23:10:50",
410+
"ac:associatedSpecimenReference": "https://bryophyteportal.org/portal/collections/individual/index.php?occid=2212837"
411+
}
412+
}
413+
d = copy.deepcopy(r["data"])
414+
output = conversions.grabAll("mediarecords", d)
415+
self.assertListEqual([],output['flags'])
416+
self.assertGreaterEqual(output['dqs'], 0.0)
417+
self.assertLessEqual(output['dqs'], 1.0)
418+
419+
def test_grab_all_on_publisher(self):
420+
r = {
421+
"auto_publish": "false",
422+
"base_url": "null",
423+
"name": "FCC Tardigrades Darwin Core Archive rss feed",
424+
"publisher_type": "rss",
425+
"recordsets": {},
426+
"rss_url": "https://mywaterbears.org/portal/content/dwca/rss.xml"
427+
}
428+
429+
output = conversions.grabAll("publishers", r)
430+
self.assertListEqual([],output['flags'])
431+
self.assertGreaterEqual(output['dqs'], 0.0)
432+
self.assertLessEqual(output['dqs'], 1.0)
390433

391434
class TestGetfield(unittest.TestCase):
392435
def test_getfield(self):

0 commit comments

Comments
 (0)