Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ A Node.js framework for extracting mCODE FHIR resources. All resources are profi
- [Extraction Date Range](#extraction-date-range)
- [CLI From-Date and To-Date (NOT recommended use)](#cli-from-date-and-to-date-not-recommended-use)
- [Troubleshooting](#troubleshooting)
- [NULL/NIL values found and replaced with empty-strings](#nullnil-values-found-and-replaced-with-empty-strings)
- [Byte Order Markers in CSV Files](#byte-order-markers-in-csv-files)
- [Terminology and Architecture](#terminology-and-architecture)
- [Glossary](#glossary)
Expand Down Expand Up @@ -165,6 +166,10 @@ npm start -- --entries-filter --from-date <YYYY-MM-DD> --to-date <YYYY-MM-DD> --

### Troubleshooting

#### NULL/NIL values found and replaced with empty-strings

When CSV files are provided containing NULL/NIL values, those values are treated as empty values and are translated into ''. Each Extractor, however, defines a set of `unalterableColumns` which will be immune from this NULL/NIL correction. All values that are corrected will produce a `debug`-level message, and can be seen by running the extractor with the debug flag set.

#### Byte Order Markers in CSV Files

The extraction client has built-in handling of byte order markers for CSV files in UTF-8 and UTF-16LE encodings. When using CSV files in other encodings, if you experience unexpected errors be sure to check for a byte order marker at the beginning of the file. One way to check is to run the following command from the command line:
Expand Down
5 changes: 3 additions & 2 deletions src/extractors/BaseCSVExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ const { validateCSV } = require('../helpers/csvValidator');
const logger = require('../helpers/logger');

class BaseCSVExtractor extends Extractor {
constructor({ filePath, csvSchema }) {
constructor({ filePath, csvSchema, unalterableColumns }) {
super();
this.unalterableColumns = unalterableColumns || [];
this.csvSchema = csvSchema;
this.filePath = path.resolve(filePath);
this.csvModule = new CSVModule(this.filePath);
this.csvModule = new CSVModule(this.filePath, this.unalterableColumns);
}

validate() {
Expand Down
4 changes: 3 additions & 1 deletion src/extractors/CSVPatientExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ function joinAndReformatData(patientData) {

class CSVPatientExtractor extends BaseCSVExtractor {
constructor({ filePath, mask = [] }) {
super({ filePath, csvSchema: CSVPatientSchema });
// Define CSV Columns whose values should never be altered
const unalterableColumns = ['familyName', 'givenName'];
super({ filePath, csvSchema: CSVPatientSchema, unalterableColumns });
this.mask = mask;
}

Expand Down
44 changes: 41 additions & 3 deletions src/modules/CSVModule.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,54 @@ const moment = require('moment');
const parse = require('csv-parse/lib/sync');
const logger = require('../helpers/logger');

// The standard string normalizer function
function stringNormalizer(str) {
return str.toLowerCase();
}

// For translating null/nil-like values into empty strings
function normalizeEmptyValues(data, unalterableColumns = []) {
const EMPTY_VALUES = ['null', 'nil'].map(stringNormalizer);
const normalizedUnalterableColumns = unalterableColumns.map(stringNormalizer);
// Flag tracking if empty values were normalized or not.
let wasEmptyNormalized = false;
const newData = data.map((row, i) => {
const newRow = { ...row };
// Filter out unalterable columns
const columnsToNormalize = Object.keys(row).filter((col) => !normalizedUnalterableColumns.includes(stringNormalizer(col)));
columnsToNormalize.forEach((col) => {
const value = newRow[col];
// If the value for this row-col combo is a value that should be empty, replace it
if (EMPTY_VALUES.includes(stringNormalizer(value))) {
logger.debug(`NULL/NIL values '${value}' found in row-${i}, col-${col}`);
wasEmptyNormalized = true;
newRow[col] = '';
}
});
return newRow;
});

if (wasEmptyNormalized) {
logger.warn('NULL/NIL values found and replaced with empty-strings');
}
return newData;
}

class CSVModule {
constructor(csvFilePath) {
this.data = parse(fs.readFileSync(csvFilePath), { columns: (header) => header.map((column) => column.toLowerCase()), bom: true });
constructor(csvFilePath, unalterableColumns) {
// Parse then normalize the data
const parsedData = parse(fs.readFileSync(csvFilePath), {
columns: (header) => header.map((column) => stringNormalizer(column)),
bom: true,
});
this.data = normalizeEmptyValues(parsedData, unalterableColumns);
}

async get(key, value, fromDate, toDate) {
logger.debug(`Get csvModule info by key '${key}'`);
// return all rows if key and value aren't provided
if (!key && !value) return this.data;
let result = this.data.filter((d) => d[key.toLowerCase()] === value);
let result = this.data.filter((d) => d[stringNormalizer(key)] === value);
if (result.length === 0) {
logger.warn(`CSV Record with provided key '${key}' and value was not found`);
return result;
Expand Down
107 changes: 75 additions & 32 deletions test/modules/CSVModule.test.js
Original file line number Diff line number Diff line change
@@ -1,48 +1,91 @@
const path = require('path');
const rewire = require('rewire');
const { CSVModule } = require('../../src/modules');
const exampleResponse = require('./fixtures/csv-response.json');

const CSVModuleRewired = rewire('../../src/modules/CSVModule.js');
const normalizeEmptyValues = CSVModuleRewired.__get__('normalizeEmptyValues');

const INVALID_MRN = 'INVALID MRN';
const csvModule = new CSVModule(path.join(__dirname, './fixtures/example-csv.csv'));
const csvModuleWithBOMs = new CSVModule(path.join(__dirname, './fixtures/example-csv-bom.csv'));

test('Reads data from CSV', async () => {
const data = await csvModule.get('mrn', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
});

test('Reads data from CSV with a Byte Order Mark', async () => {
const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
});
describe('CSVModule', () => {
describe('get', () => {
test('Reads data from CSV', async () => {
const data = await csvModule.get('mrn', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
});

test('Returns multiple rows', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2');
expect(data).toHaveLength(2);
});
test('Reads data from CSV with a Byte Order Mark', async () => {
const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
});

test('Returns all rows when both key and value are undefined', async () => {
const data = await csvModule.get();
expect(data).toHaveLength(csvModule.data.length);
expect(data).toEqual(csvModule.data);
});
test('Returns multiple rows', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2');
expect(data).toHaveLength(2);
});

test('Returns data with recordedDate after specified from date', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01');
expect(data).toHaveLength(1);
});
test('Returns all rows when both key and value are undefined', async () => {
const data = await csvModule.get();
expect(data).toHaveLength(csvModule.data.length);
expect(data).toEqual(csvModule.data);
});

test('Returns data with recordedDate before specified to date', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01');
expect(data).toHaveLength(1);
});
test('Returns data with recordedDate after specified from date', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01');
expect(data).toHaveLength(1);
});

test('Should return an empty array when key-value pair does not exist', async () => {
const data = await csvModule.get('mrn', INVALID_MRN);
expect(data).toEqual([]);
});
test('Returns data with recordedDate before specified to date', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01');
expect(data).toHaveLength(1);
});

test('Should return an empty array when key-value pair does not exist', async () => {
const data = await csvModule.get('mrn', INVALID_MRN);
expect(data).toEqual([]);
});

test('Should return proper value regardless of key casing', async () => {
const data = await csvModule.get('mRN', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
});
});

describe('normalizeEmptyValues', () => {
it('Should turn "null" values into empty strings, regardless of case', () => {
const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }];
const normalizedData = normalizeEmptyValues(data);
normalizedData.forEach((d) => {
expect(d.key).toBe('');
});
});

it('Should turn "nil" values into empty strings, regardless of case', () => {
const data = [{ key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }];
const normalizedData = normalizeEmptyValues(data);
normalizedData.forEach((d) => {
expect(d.key).toBe('');
});
});

it('Should not modify unalterableColumns, regardless of their value', () => {
const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }, { key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }];
const normalizedData = normalizeEmptyValues(data, ['key']);
normalizedData.forEach((d) => {
expect(d.key).not.toBe('');
});
});

test('Should return proper value regardless of key casing', async () => {
const data = await csvModule.get('mRN', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
it('Should leave all other values uneffected, regardless of case', () => {
const data = [{ key: 'anything' }, { key: 'any' }, { key: 'thing' }];
const normalizedData = normalizeEmptyValues(data);
normalizedData.forEach((d) => {
expect(d.key).not.toBe('');
});
});
});
});