Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"dependencies": {
"ajv": "^6.12.6",
"antlr4": "4.8.0",
"axios": "^0.21.1",
"commander": "^6.2.0",
"csv-parse": "^4.8.8",
"fhir-crud-client": "^1.2.2",
Expand Down
35 changes: 20 additions & 15 deletions src/client/BaseClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,32 +25,37 @@ class BaseClient {
}

// Given an extractor configuration, initialize all the necessary extractors
initializeExtractors(extractorConfig, commonExtractorArgs) {
let allExtractorsValid = true;

async initializeExtractors(extractorConfig, commonExtractorArgs) {
// Loop to initialize the extractors
extractorConfig.forEach((curExtractorConfig) => {
const { label, type, constructorArgs } = curExtractorConfig;
logger.debug(`Initializing ${label} extractor with type ${type}`);
const ExtractorClass = this.extractorClasses[type];

try {
const newExtractor = new ExtractorClass({ ...commonExtractorArgs, ...constructorArgs });

if (newExtractor.validate) {
const isExtractorValid = newExtractor.validate();
allExtractorsValid = (allExtractorsValid && isExtractorValid);
if (isExtractorValid) {
logger.debug(`Extractor ${label} PASSED CSV validation`);
} else {
logger.debug(`Extractor ${label} FAILED CSV validation`);
}
}

this.extractors.push(newExtractor);
} catch (e) {
throw new Error(`Unable to initialize ${label} extractor with type ${type}: ${e.message}`);
}
});
// For validation, we are looping over extractors and performing an async operation on each.
// We need to loop without forEach (since forEach is sequential).
// Using Reduce to compute the validity of all extractors
const allExtractorsValid = await this.extractors.reduce(async (curExtractorsValid, curExtractor) => {
const { name } = curExtractor.constructor;

if (curExtractor.validate) {
logger.debug(`Validating ${name}`);
const isExtractorValid = await curExtractor.validate();
if (isExtractorValid) {
logger.debug(`Extractor ${name} PASSED CSV validation`);
} else {
logger.warn(`Extractor ${name} FAILED CSV validation`);
}
return (curExtractorsValid && isExtractorValid);
}
return curExtractorsValid;
}, true);

if (allExtractorsValid) {
logger.info('Validation succeeded');
Expand Down
27 changes: 13 additions & 14 deletions src/extractors/BaseCSVExtractor.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
const path = require('path');
const { Extractor } = require('./Extractor');
const { CSVModule } = require('../modules');
const { validateCSV } = require('../helpers/csvValidator');
const logger = require('../helpers/logger');
const { CSVFileModule, CSVURLModule } = require('../modules');

class BaseCSVExtractor extends Extractor {
constructor({ filePath, csvSchema, unalterableColumns }) {
constructor({ filePath, url, csvSchema, unalterableColumns }) {
super();
this.unalterableColumns = unalterableColumns || [];
this.csvSchema = csvSchema;
this.filePath = path.resolve(filePath);
this.csvModule = new CSVModule(this.filePath, this.unalterableColumns);
if (url) {
this.url = url;
this.csvModule = new CSVURLModule(this.url, this.unalterableColumns);
} else if (filePath) {
this.filePath = filePath;
this.csvModule = new CSVFileModule(this.filePath, this.unalterableColumns);
} else {
throw new Error('Trying to instantiate a CSVExtractor without a filePath or url');
}
}

validate() {
if (this.csvSchema) {
logger.info(`Validating CSV file for ${this.filePath}`);
return validateCSV(this.filePath, this.csvSchema, this.csvModule.data);
}
logger.warn(`No CSV schema provided for ${this.filePath}`);
return true;
async validate() {
return this.csvModule.validate(this.csvSchema);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/extractors/CSVAdverseEventExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ function formatData(adverseEventData, patientId) {
}

class CSVAdverseEventExtractor extends BaseCSVExtractor {
constructor({ filePath }) {
super({ filePath });
constructor({ filePath, url }) {
super({ filePath, url });
}

async getAdverseEventData(mrn) {
Expand Down
4 changes: 2 additions & 2 deletions src/extractors/CSVCancerDiseaseStatusExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ const logger = require('../helpers/logger');
const { CSVCancerDiseaseStatusSchema } = require('../helpers/schemas/csv');

class CSVCancerDiseaseStatusExtractor extends BaseCSVExtractor {
constructor({ filePath, implementation }) {
super({ filePath, csvSchema: CSVCancerDiseaseStatusSchema });
constructor({ filePath, url, implementation }) {
super({ filePath, url, csvSchema: CSVCancerDiseaseStatusSchema });
this.implementation = implementation;
}

Expand Down
4 changes: 2 additions & 2 deletions src/extractors/CSVCancerRelatedMedicationExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ function formatData(medicationData, patientId) {
}

class CSVCancerRelatedMedicationExtractor extends BaseCSVExtractor {
constructor({ filePath }) {
super({ filePath });
constructor({ filePath, url }) {
super({ filePath, url });
}

async getMedicationData(mrn) {
Expand Down
4 changes: 2 additions & 2 deletions src/extractors/CSVClinicalTrialInformationExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ const { CSVClinicalTrialInformationSchema } = require('../helpers/schemas/csv');


class CSVClinicalTrialInformationExtractor extends BaseCSVExtractor {
constructor({ filePath, clinicalSiteID, clinicalSiteSystem }) {
super({ filePath, csvSchema: CSVClinicalTrialInformationSchema });
constructor({ filePath, url, clinicalSiteID, clinicalSiteSystem }) {
super({ filePath, url, csvSchema: CSVClinicalTrialInformationSchema });
if (!clinicalSiteID) logger.warn(`${this.constructor.name} expects a value for clinicalSiteID but got ${clinicalSiteID}`);
this.clinicalSiteID = clinicalSiteID;
this.clinicalSiteSystem = clinicalSiteSystem;
Expand Down
4 changes: 2 additions & 2 deletions src/extractors/CSVConditionExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ function formatData(conditionData, patientId) {
}

class CSVConditionExtractor extends BaseCSVExtractor {
constructor({ filePath }) {
super({ filePath, csvSchema: CSVConditionSchema });
constructor({ filePath, url }) {
super({ filePath, url, csvSchema: CSVConditionSchema });
}

async getConditionData(mrn) {
Expand Down
4 changes: 2 additions & 2 deletions src/extractors/CSVObservationExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ function formatData(observationData, patientId) {
}

class CSVObservationExtractor extends BaseCSVExtractor {
constructor({ filePath }) {
super({ filePath });
constructor({ filePath, url }) {
super({ filePath, url });
}

async getObservationData(mrn) {
Expand Down
9 changes: 7 additions & 2 deletions src/extractors/CSVPatientExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,15 @@ function joinAndReformatData(patientData) {
}

class CSVPatientExtractor extends BaseCSVExtractor {
constructor({ filePath, mask = [] }) {
constructor({ filePath, url, mask = [] }) {
// Define CSV Columns whose values should never be altered
const unalterableColumns = ['familyName', 'givenName'];
super({ filePath, csvSchema: CSVPatientSchema, unalterableColumns });
super({
filePath,
url,
csvSchema: CSVPatientSchema,
unalterableColumns,
});
this.mask = mask;
}

Expand Down
4 changes: 2 additions & 2 deletions src/extractors/CSVProcedureExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ function formatData(procedureData, patientId) {
}

class CSVProcedureExtractor extends BaseCSVExtractor {
constructor({ filePath }) {
super({ filePath });
constructor({ filePath, url }) {
super({ filePath, url });
}

async getProcedureData(mrn) {
Expand Down
4 changes: 2 additions & 2 deletions src/extractors/CSVStagingExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ function formatStagingData(stagingData, categoryIds, patientId) {
}

class CSVStagingExtractor extends BaseCSVExtractor {
constructor({ filePath }) {
super({ filePath });
constructor({ filePath, url }) {
super({ filePath, url });
}

async getStagingData(mrn) {
Expand Down
4 changes: 2 additions & 2 deletions src/extractors/CSVTreatmentPlanChangeExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ function formatData(tpcData, patientId) {
}

class CSVTreatmentPlanChangeExtractor extends BaseCSVExtractor {
constructor({ filePath }) {
super({ filePath, csvSchema: CSVTreatmentPlanChangeSchema });
constructor({ filePath, url }) {
super({ filePath, url, csvSchema: CSVTreatmentPlanChangeSchema });
}

async getTPCData(mrn, fromDate, toDate) {
Expand Down
43 changes: 43 additions & 0 deletions src/helpers/csvParsingUtils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
const logger = require('./logger');

// The standard string normalizer function
function stringNormalizer(str) {
return str.toLowerCase();
}

// For translating null/nil-like values into empty strings
function normalizeEmptyValues(data, unalterableColumns = []) {
const EMPTY_VALUES = ['null', 'nil'].map(stringNormalizer);
const normalizedUnalterableColumns = unalterableColumns.map(stringNormalizer);
// Flag tracking if empty values were normalized or not.
let wasEmptyNormalized = false;
const newData = data.map((row, i) => {
const newRow = { ...row };
// Filter out unalterable columns
const columnsToNormalize = Object.keys(row).filter(
(col) => !normalizedUnalterableColumns.includes(stringNormalizer(col)),
);
columnsToNormalize.forEach((col) => {
const value = newRow[col];
// If the value for this row-col combo is a value that should be empty, replace it
if (EMPTY_VALUES.includes(stringNormalizer(value))) {
logger.debug(
`NULL/NIL values '${value}' found in row-${i}, col-${col}`,
);
wasEmptyNormalized = true;
newRow[col] = '';
}
});
return newRow;
});

if (wasEmptyNormalized) {
logger.warn('NULL/NIL values found and replaced with empty-strings');
}
return newData;
}

module.exports = {
stringNormalizer,
normalizeEmptyValues,
};
12 changes: 7 additions & 5 deletions src/helpers/csvValidator.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
const _ = require('lodash');
const logger = require('./logger');

function validateCSV(pathToCSVFile, csvSchema, csvData) {
// Validates csvData against the csvSchema
// Uses the csvFileIdentifier in logs for readability
function validateCSV(csvFileIdentifier, csvSchema, csvData) {
let isValid = true;

// Check headers
Expand All @@ -10,17 +12,17 @@ function validateCSV(pathToCSVFile, csvSchema, csvData) {
const fileDiff = _.difference(headers, csvSchema.headers.map((h) => h.name.toLowerCase()));

if (fileDiff.length > 0) {
logger.warn(`Found extra column(s) in CSV ${pathToCSVFile}: "${fileDiff.join(',')}"`);
logger.warn(`Found extra column(s) in CSV ${csvFileIdentifier}: "${fileDiff.join(',')}"`);
}

if (schemaDiff.length > 0) {
schemaDiff.forEach((sd) => {
const headerSchema = csvSchema.headers.find((h) => h.name.toLowerCase() === sd);
if (headerSchema.required) {
logger.error(`Column ${sd} is marked as required but is missing in CSV ${pathToCSVFile}`);
logger.error(`Column ${sd} is marked as required but is missing in CSV ${csvFileIdentifier}`);
isValid = false;
} else {
logger.warn(`Column ${sd} is missing in CSV ${pathToCSVFile}`);
logger.warn(`Column ${sd} is missing in CSV ${csvFileIdentifier}`);
}
});
}
Expand All @@ -31,7 +33,7 @@ function validateCSV(pathToCSVFile, csvSchema, csvData) {
const schema = csvSchema.headers.find((h) => h.name === key);

if (schema && schema.required && !value) {
logger.error(`Column ${key} marked as required but missing value in row ${i + 1} column ${j + 1} in CSV ${pathToCSVFile}`);
logger.error(`Column ${key} marked as required but missing value in row ${i + 1} column ${j + 1} in CSV ${csvFileIdentifier}`);
isValid = false;
}
});
Expand Down
4 changes: 4 additions & 0 deletions src/helpers/schemas/config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@
"filePath": {
"type": "string"
},
"url": {
"type": "string",
"format": "uri"
},
"clinicalSiteID": {
"type": "string"
},
Expand Down
5 changes: 3 additions & 2 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ const {
MCODERadiationProcedureExtractor,
MCODESurgicalProcedureExtractor,
} = require('./extractors');
const { BaseFHIRModule, CSVModule } = require('./modules');
const { BaseFHIRModule, CSVFileModule, CSVURLModule } = require('./modules');
const { getEthnicityDisplay,
getPatientName,
getRaceCodesystem,
Expand Down Expand Up @@ -85,7 +85,8 @@ module.exports = {
CSVCancerRelatedMedicationExtractor,
CSVClinicalTrialInformationExtractor,
CSVConditionExtractor,
CSVModule,
CSVFileModule,
CSVURLModule,
CSVPatientExtractor,
CSVObservationExtractor,
CSVProcedureExtractor,
Expand Down
Loading