Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ This class is the main interface for reading data from dBase files. It extends `
The support options are:

* encoding `String` The character encoding to use (default = `utf-8`)
* encoder `Function` The encoder for field value. (default `buffer.toString(encoding)` used)

Creates a new Parser and attaches it to the specified filename.

Expand Down Expand Up @@ -117,6 +118,23 @@ The following code example illustrates a very simple usage for this module:
});

parser.parse();

How to use encodings not supported by nodejs Buffer:

var Parser = require('node-dbf');
var iconv = require('iconv-lite'); //npm install iconv-lite

encodingFunction = function (buffer, encoding) {
return iconv.decode(buffer, 'CP866').trim(); //CP1252....
};

var parser = new Parser('/path/to/my/dbase/file.dbf', {encoder:encodingFunction});

parser.on('record', function(record) {
console.log('Name: ' + record.firstName + ' ' + record.lastName); // Name: Jü Smith
});

parser.parse();

#Command-Line Interface (CLI)

Expand Down
32 changes: 21 additions & 11 deletions src/parser.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ class Parser extends EventEmitter

constructor: (@filename, @options = {}) ->
@encoding = @options?.encoding || 'utf-8'
@encoder = @options?.encoder || @getValueString
@readStreamOptions = @options?.readStreamOptions

parse: =>
@emit 'start', @
Expand All @@ -16,13 +18,12 @@ class Parser extends EventEmitter
@emit 'header', @header

sequenceNumber = 0

loc = @header.start
loc = 0
bufLoc = @header.start
overflow = null
@paused = false

stream = fs.createReadStream @filename
stream = fs.createReadStream @filename, @readStreamOptions

@readBuf = =>

Expand All @@ -31,18 +32,24 @@ class Parser extends EventEmitter
return

while buffer = stream.read()
if bufLoc isnt @header.start then bufLoc = 0
if overflow isnt null then buffer = overflow + buffer

if overflow isnt null then buffer = Buffer.concat [overflow, buffer]

while loc < (@header.start + @header.numberOfRecords * @header.recordLength) && (bufLoc + @header.recordLength) <= buffer.length
@emit 'record', @parseRecord ++sequenceNumber, buffer.slice bufLoc, bufLoc += @header.recordLength

loc += bufLoc
if bufLoc < buffer.length then overflow = buffer.slice bufLoc, buffer.length else overflow = null

if bufLoc < buffer.length
overflow = buffer.slice bufLoc, buffer.length
loc += bufLoc
bufLoc = 0
else
overflow = null
bufLoc -= buffer.length
loc += buffer.length


return @

stream.on 'readable',@readBuf
stream.on 'readable',@readBuf
stream.on 'end', () =>
@emit 'end'

Expand All @@ -68,9 +75,12 @@ class Parser extends EventEmitter
record[field.name] = @parseField field, buffer.slice loc, loc += field.length

return record

getValueString: (buffer, encoding) =>
return (buffer.toString encoding).trim()

parseField: (field, buffer) =>
value = (buffer.toString @encoding).trim()
value = @encoder buffer, @encoding

if field.type is 'N'
value = parseInt value, 10
Expand Down
67 changes: 66 additions & 1 deletion test/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,72 @@ describe('Parser', function() {
expect(events.end).to.be.above(events.record);
});
});


describe('Parsing the SF zip codes with small chunksize and own encoder', function() {
var encoder_func_called = false;
var own_encoder = function (buffer, encoding) { encoder_func_called = true; return (buffer.toString(encoding)).trim();};

var parser = new Parser(__dirname + '/fixtures/bayarea_zipcodes.dbf', { encoder: own_encoder, readStreamOptions:{ highWaterMark:50}}); //read by 50 bytes chunk (below header size and below recordsize)
var header, records = [], events, header;


before(function(done) {
events = {start: undefined, header: undefined, record: undefined, end: undefined};

parser.on('start', function() {
events.start = process.hrtime()[1];
});

parser.on('header', function(h) {
header = h;
events.header = process.hrtime()[1];
});

parser.on('record', function(record) {
records.push(record);
events.record = process.hrtime()[1];
});

parser.on('end', function() {
events.end = process.hrtime()[1];
done();
});

parser.parse();
});

describe('the records', function() {
it('there are 187', function() {
expect(records).to.have.lengthOf(187);
});

it('the header says there are 187', function() {
expect(header.numberOfRecords).to.equal(187);
});

it('contain the 94111 zip code', function() {
var area = records.filter(function(v) { return '94111' === v.ZIP; });

expect(area).to.be.an('Array');
expect(area).to.have.lengthOf(1);
area = area[0];

expect(area['@sequenceNumber']).to.be.a('Number');
expect(area['@deleted']).to.equal(false);

expect(area.ZIP).to.equal('94111');
expect(area.PO_NAME).to.equal('SAN FRANCISCO');
expect(area.STATE).to.equal('CA');
expect(area.Area__).to.be.a('Number').within(0, Number.MAX_VALUE);
expect(area.Length__).to.be.a('Number').within(0, Number.MAX_VALUE);
});

it ('call encoder func', function () {
expect(encoder_func_called).to.be.true;
});
});
})

// check a select number of them
// check floats
// check character encoding ???
Expand Down