@@ -844,74 +844,50 @@ def fixBOR(t, r):
844844 These comparisons using lowercased version seem to work, even though supplied values generally
845845 match the class name in the standard. e.g. To find PreservedSpecimen we look for "preserved".
846846 """
847- if filled ("basisofrecord" , r ):
848- if "preserved" in r ["basisofrecord" ]:
849- r ["basisofrecord" ] = "preservedspecimen"
850- elif "fossil" in r ["basisofrecord" ]:
851- r ["basisofrecord" ] = "fossilspecimen"
852- elif "living" in r ["basisofrecord" ]:
853- r ["basisofrecord" ] = "livingspecimen"
854- elif "material" in r ["basisofrecord" ]:
855- r ["basisofrecord" ] = "materialsample"
856- elif "specimen" in r ["basisofrecord" ]:
857- r ["basisofrecord" ] = "preservedspecimen"
858- elif "machine" in r ["basisofrecord" ] and "observation" in r ["basisofrecord" ]:
859- r ["basisofrecord" ] = "machineobservation"
860- elif "observation" in r ["basisofrecord" ]:
861- r ["basisofrecord" ] = "humanobservation"
862- elif "occurrence" in r ["basisofrecord" ]:
863- r ["basisofrecord" ] = "occurrence"
847+ if t == "records" :
848+ if filled ("basisofrecord" , r ):
849+ if "preserved" in r ["basisofrecord" ]:
850+ r ["basisofrecord" ] = "preservedspecimen"
851+ elif "fossil" in r ["basisofrecord" ]:
852+ r ["basisofrecord" ] = "fossilspecimen"
853+ elif "living" in r ["basisofrecord" ]:
854+ r ["basisofrecord" ] = "livingspecimen"
855+ elif "material" in r ["basisofrecord" ]:
856+ r ["basisofrecord" ] = "materialsample"
857+ elif "specimen" in r ["basisofrecord" ]:
858+ r ["basisofrecord" ] = "preservedspecimen"
859+ elif "machine" in r ["basisofrecord" ] and "observation" in r ["basisofrecord" ]:
860+ r ["basisofrecord" ] = "machineobservation"
861+ elif "observation" in r ["basisofrecord" ]:
862+ r ["basisofrecord" ] = "humanobservation"
863+ elif "occurrence" in r ["basisofrecord" ]:
864+ r ["basisofrecord" ] = "occurrence"
865+ else :
866+ r ["basisofrecord" ] = None
867+ r ["flag_dwc_basisofrecord_removed" ] = True
868+ r ["flag_dwc_basisofrecord_invalid" ] = True
864869 else :
865- r ["basisofrecord" ] = None
866- r ["flag_dwc_basisofrecord_removed" ] = True
867870 r ["flag_dwc_basisofrecord_invalid" ] = True
868871
869- # Disable based on feedback from John W. and Joanna
870- # if r["basisofrecord"] == "preservedspecimen":
871- # paleo_terms = [
872- # "bed",
873- # "group",
874- # "member",
875- # "formation",
876- # "lowestbiostratigraphiczone",
877- # "lithostratigraphicterms",
878- # "earliestperiodorlowestsystem",
879- # "earliesteraorlowesterathem",
880- # "earliestepochorlowestseries",
881- # "earliestageorloweststage",
882- # "latesteraorhighesterathem",
883- # "latestepochorhighestseries",
884- # "latestageorhigheststage",
885- # "latestperiodorhighestsystem",
886- # ]
887-
888- # for f in paleo_terms:
889- # if filled(f,r):
890- # r["flag_dwc_basisofrecord_paleo_conflict"] = True
891- # r["flag_dwc_basisofrecord_replaced"] = True
892- # r["basisofrecord"] = "fossilspecimen"
893- # break
894- else :
895- r ["flag_dwc_basisofrecord_invalid" ] = True
896-
897872def fix_taxon_rank (t , r ):
898- if filled ("taxonrank" , r ):
899- if r ["taxonrank" ] in taxon_rank .mapping :
900- if taxon_rank .mapping [r ["taxonrank" ]] is None :
873+ if t == "records" :
874+ if filled ("taxonrank" , r ):
875+ if r ["taxonrank" ] in taxon_rank .mapping :
876+ if taxon_rank .mapping [r ["taxonrank" ]] is None :
877+ r ["taxonrank" ] = None
878+ r ["flag_dwc_taxonrank_removed" ] = True
879+ r ["flag_dwc_taxonrank_invalid" ] = True
880+ elif r ["taxonrank" ] != taxon_rank .mapping [r ["taxonrank" ]]:
881+ r ["taxonrank" ] = taxon_rank .mapping [r ["taxonrank" ]]
882+ r ["flag_dwc_taxonrank_replaced" ] = True
883+ else :
884+ pass # Taxon Rank is in the mapping as an identity.
885+ elif r ["taxonrank" ] not in taxon_rank .acceptable :
901886 r ["taxonrank" ] = None
902887 r ["flag_dwc_taxonrank_removed" ] = True
903888 r ["flag_dwc_taxonrank_invalid" ] = True
904- elif r ["taxonrank" ] != taxon_rank .mapping [r ["taxonrank" ]]:
905- r ["taxonrank" ] = taxon_rank .mapping [r ["taxonrank" ]]
906- r ["flag_dwc_taxonrank_replaced" ] = True
907889 else :
908- pass # Taxon Rank is in the mapping as an identity.
909- elif r ["taxonrank" ] not in taxon_rank .acceptable :
910- r ["taxonrank" ] = None
911- r ["flag_dwc_taxonrank_removed" ] = True
912- r ["flag_dwc_taxonrank_invalid" ] = True
913- else :
914- pass # Taxon Rank is Acceptable, but not mapped
890+ pass # Taxon Rank is Acceptable, but not mapped
915891
916892# Step, count, ms, ms/count action
917893# rc 1000 354.179 0.354179 record corrector
@@ -945,6 +921,7 @@ def grabAll(t, d):
945921 r .update (collect_genbank_sequences (t ,d ))
946922 # Done with non-dependant fields.
947923
924+
948925 gs_sn_crossfill (t , r )
949926 fixBOR (t , r )
950927 fix_taxon_rank (t , r )
@@ -961,7 +938,6 @@ def grabAll(t, d):
961938 if k .startswith ("flag_" ):
962939 r ["flags" ].append ("_" .join (k .split ("_" )[1 :]))
963940 r ["dqs" ] = score (t , r )
964-
965941 return r
966942
967943
0 commit comments