From e3801c2046d8253ec91b51170b9f26b78f62f6a1 Mon Sep 17 00:00:00 2001 From: Keyang Date: Fri, 30 Dec 2016 19:21:46 +0000 Subject: [PATCH] fix #128 ; add done event& hooks; removed parserMgr and parser; updated readme; use lodash.set now --- libs/core/Converter.js | 55 ++++- libs/core/defParam.js | 8 +- libs/core/fileLineToCSVLine.js | 15 ++ libs/core/index.js | 4 +- libs/core/linesToJson.js | 246 +++++++++++++++----- libs/core/parser.js | 7 +- package.json | 11 +- readme.md | 157 ++++++++++--- test/data/complexJSONCSV | 2 +- test/data/dataIgnoreEmpty | 2 +- test/data/dataWithMismatchedColumn | 2 +- test/data/dataWithType | 2 +- test/data/invalidHeader | 2 +- test/data/testData | 2 +- test/errorHandle.js | 6 +- test/testCSVConverter.js | 10 +- test/testCSVConverter2.js | 350 +++++++++++++++-------------- test/testParserMgr.js | 22 +- 18 files changed, 602 insertions(+), 301 deletions(-) create mode 100644 libs/core/fileLineToCSVLine.js diff --git a/libs/core/Converter.js b/libs/core/Converter.js index 9b1440d..87e9651 100644 --- a/libs/core/Converter.js +++ b/libs/core/Converter.js @@ -7,6 +7,7 @@ var defParam=require("./defParam"); var csvline=require("./csvline"); var fileline=require("./fileline"); var dataToCSVLine=require("./dataToCSVLine"); +var fileLineToCSVLine=require("./fileLineToCSVLine"); var linesToJson=require("./linesToJson"); var CSVError=require("./CSVError"); var workerMgr=require("./workerMgr"); @@ -41,7 +42,8 @@ function Converter(params,options) { this._csvTransf=null; this.finalResult=[]; // this.on("data", function() {}); - this.on("error", function() {}); + this.on("error", emitDone(this)); + this.on("end", emitDone(this)); this.initWorker(); process.nextTick(function(){ if (this._needEmitFinalResult === null){ @@ -69,6 +71,13 @@ function Converter(params,options) { return this; } util.inherits(Converter, Transform); +function emitDone(conv){ + return function(err){ + process.nextTick(function(){ + conv.emit('done',err) + }) + } +} Converter.prototype._transform = function(data, encoding, cb) { if (this.param.toArrayString && this.started === false) { this.started = true; @@ -94,11 +103,15 @@ Converter.prototype.setPartialData=function(d){ } Converter.prototype.processData=function(data,cb){ var params=this.param; + var fileLines=fileline(data,this.param) + if (this.preProcessLine && typeof this.preProcessLine === "function"){ + fileLines.lines=this._preProcessLines(fileLines.lines,this.lastIndex) + } if (!params._headers){ //header is not inited. init header - this.processHead(data,cb); + this.processHead(fileLines,cb); }else{ if (params.workerNum<=1){ - var lines=dataToCSVLine(data,params); + var lines=fileLineToCSVLine(fileLines,params); this.setPartialData(lines.partial); var jsonArr=linesToJson(lines.lines,params,this.recordNum); this.processResult(jsonArr) @@ -106,9 +119,22 @@ Converter.prototype.processData=function(data,cb){ this.recordNum+=jsonArr.length; cb(); }else{ - this.workerProcess(data,cb); + this.workerProcess(fileLines,cb); + } + } +} +Converter.prototype._preProcessLines=function(lines,startIdx){ + var rtn=[] + for (var i=0;i -1) { + return param._headerFlag[i] = 'omit' + } else if (head.indexOf('*flat*') > -1) { + return param._headerFlag[i] = 'flat' + } else { + return param._headerFlag[i] = '' + } + } +} +function getTitle(head, i, param) { + if (param._headerTitle[i]) { + return param._headerTitle[i] + } else { + var flag = getFlag(head, i, param) + var str = head.replace(flag, '') + str = str.replace('string#!', '').replace('number#!', '') + return param._headerTitle[i] = str + } +} + +function checkType(item, head, headIdx, param) { + if (param._headerType[headIdx]) { + return param._headerType[headIdx] + } else { + if (head.indexOf('number#!') > -1) { + return param._headerType[headIdx] = numberType + } else if (head.indexOf('string#!') > -1) { + return param._headerType[headIdx] = stringType + } else if (param.checkType) { + return param._headerType[headIdx] = dynamicType(item) + } else { + return param._headerType[headIdx] = stringType + } + } +} + +function numberType(item) { + var rtn = parseFloat(item) + if (isNaN(rtn)) { + return 0; + } + return rtn; +} + +function stringType(item) { + return item.toString(); +} + +function dynamicType(item) { + var trimed = item.trim(); + if (trimed === "") { + return stringType; + } + if (!isNaN(trimed)) { + return numberType + } else if (trimed.length === 5 && trimed.toLowerCase() === "false" || trimed.length === 4 && trimed.toLowerCase() === "true") { + return booleanType; + } else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1] === "]") { + return jsonType; + + } else { + return stringType; + } +} + +function booleanType(item) { + var trimed = item.trim(); + if (trimed.length === 5 && trimed.toLowerCase() === "false") { + return false; + } else { + return true; + } +} + +function jsonType(item) { + try { + return JSON.parse(item); + } catch (e) { + return item; + } +} +// function dynamicType(item) { +// var trimed = item.trim(); +// if (trimed === "") { +// return trimed; +// } +// if (!isNaN(trimed)) { +// return parseFloat(trimed); +// } else if (trimed.length === 5 && trimed.toLowerCase() === "false") { +// return false; +// } else if (trimed.length === 4 && trimed.toLowerCase() === "true") { +// return true; +// } else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1] === "]") { +// try { +// return JSON.parse(trimed); +// } catch (e) { +// return item; +// } +// } else { +// return item; + +// } +// } \ No newline at end of file diff --git a/libs/core/parser.js b/libs/core/parser.js index f38684f..c2012ae 100644 --- a/libs/core/parser.js +++ b/libs/core/parser.js @@ -16,7 +16,7 @@ function Parser(name, regExp, parser, processSafe) { this.parse = parser; } } -var numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/; +// var numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/; Parser.prototype.convertType = function(item) { var type=this.type; if (type === 'number') { @@ -28,7 +28,10 @@ Parser.prototype.convertType = function(item) { } } else if (this.param && this.param.checkType && type === '') { var trimed = item.trim(); - if (numReg.test(trimed)) { + if (trimed === ""){ + return trimed; + } + if (!isNaN(trimed)) { return parseFloat(trimed); } else if (trimed.length === 5 && trimed.toLowerCase() === "false") { return false; diff --git a/package.json b/package.json index 9e3a675..491a745 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "email": "t3dodson@gmail.com" } ], - "version": "1.1.0", + "version": "1.1.1", "keywords": [ "csv", "csvtojson", @@ -48,14 +48,15 @@ "grunt-newer": "^1.1.0", "imgur": "^0.1.5", "load-grunt-tasks": "^3.4.0", - "mocha": "^2.4.5" + "mocha": "^2.4.5", + "minimist": "^1.2.0" }, "dependencies": { - "minimist": "^1.2.0" + "lodash": "^4.17.3" }, "scripts": { "test": "mocha ./test -R spec", - "test-debug":"mocha debug ./test -R spec", - "test-all":"mocha debug ./test -R spec && CSV_WORKER=3 mocha ./test -R spec " + "test-debug": "mocha debug ./test -R spec", + "test-all": "mocha ./test -R spec && CSV_WORKER=3 mocha ./test -R spec " } } diff --git a/readme.md b/readme.md index e9b5cf1..982aa00 100644 --- a/readme.md +++ b/readme.md @@ -53,39 +53,38 @@ npm i --save csvtojson ```js /** csvStr: -a,b,c 1,2,3 4,5,6 +7,8,9 */ const csv=require('csvtojson') -csv() +csv({noheader:true}) .fromString(csvStr) -.on('json',(jsonObj)=>{ - console.log(jsonObj.a) // 1 and 4 -}) -.on('csv',(csvRow)=>{ - console.log(csvRow) // [1,2,3] and [4,5,6] . - //use noheader param to access [a,b,c]. see params below +.on('csv',(csvRow)=>{ // this func will be called 3 times + console.log(csvRow) // => [1,2,3] , [4,5,6] , [7,8,9] }) -.on('end_parsed',(jsonArr)=>{ - assert.equal(jsonArr.length ,2) - assert.equal(jsonArr[0].a,1) +.on('done',()=>{ + //parsing finished }) ``` ### From CSV File ```js - +/** csv file +a,b,c +1,2,3 +4,5,6 +*/ const csvFilePath='' const csv=require('csvtojson') csv() .fromFile(csvFilePath) -.on('data',(data)=>{ - const jsonString=data.toString("utf8") - // json String is stringified json object +.on('json',(jsonObj)=>{ + // combine csv header row and csv line to a json object + // jsonObj.a ==> 1 or 4 }) -.on('end',()=>{ +.on('done',(error)=>{ console.log('end') }) @@ -100,36 +99,54 @@ const csv=require('csvtojson') csv() .fromStream(csvReadStream) .on('csv',(csvRow)=>{ - + // csvRow is an array }) -.on('end',()=>{ +.on('done',(error)=>{ }) -.on('error',(err)=>{ - console.log(err) - csvReadStream.unpipe() -}) - ``` -### Convert to CSV row arrays +### Convert to CSV row arrays with csv header row ```js /** csvStr: a,b,c -d,e,f 1,2,3 +4,5,6 +*/ + +const csv=require('csvtojson') +csv() +.fromString(csvStr) +.on('csv',(csvRow)=>{ //this func will be called twice. Header row will not be populated + // csvRow => [1,2,3] and [4,5,6] +}) +.on('done',()=>{ + console.log('end') +}) +``` + +### Convert to JSON without csv header row + +```js +/** +csvStr: +1,2,3 +4,5,6 +7,8,9 */ const csv=require('csvtojson') csv({noheader:true}) .fromString(csvStr) -.on('csv',(csvRow)=>{ - //[a,b,c] [d,e,f] [1,2,3] +.on('json',(json)=>{ //this func will be called 3 times + // json.field1 => 1,4,7 + // json.field2 => 2,5,8 + // json.field3 => 3,6,9 }) -.on('end',()=>{ +.on('done',()=>{ console.log('end') }) ``` @@ -187,7 +204,7 @@ In above, `converter` is an instance of Converter which is a subclass of node.js * [Parameters](#parameters) * [Events](#events) -* [Transform](#transform) +* [Hook / Transform](#hook-&-transform) * [Nested JSON Structure](#nested-json-structure) * [Header Row](#header-row) * [Multi CPU Core Support](#multi-cpu-core-support) @@ -218,7 +235,6 @@ const converter=csv({ ``` Following parameters are supported: -* **constructResult**: true/false. Whether to construct final json object in memory which will be populated in "end_parsed" event. Set to false if deal with huge csv data. default: true. * **delimiter**: delimiter used for seperating columns. Use "auto" if delimiter is unknown in advance, in this case, delimiter will be auto-detected (by best attempt). Use an array to give a list of potential delimiters e.g. [",","|","$"]. default: "," * **quote**: If a column contains delimiter, it is able to use quote character to surround the column content. e.g. "hello, world" wont be split into two columns while parsing. Set to "off" will ignore all quotes. default: " (double quote) * **trim**: Indicate if parser trim off spaces surrounding column content. e.g. " content " will be trimmed to "content". Default: true @@ -256,7 +272,7 @@ csv() ### csv -`csv` event is emitted for each parsed CSV line. It passes an array object which contains cells content of one csv row. +`csv` event is emitted for each CSV line. It passes an array object which contains cells content of one csv row. ```js const csv=require('csvtojson') @@ -267,6 +283,10 @@ csv() }) ``` +`csvRow` is always an array of strings no matter `checkType` value. + +`csv` event is the fastest parse event while `json` and `data` event is about 2 times slower. Thus if `csv` is enough, for best performance, just use it without `json` and `data` event. + ### data `data` event is emitted for each parsed CSV line. It passes buffer of strigified JSON unless `objectMode` is set true in stream option. @@ -291,6 +311,10 @@ csv() }) ``` +Note that if `error` being emitted, the process will stop as node.js will automatically `unpipe()` upper-stream and chained down-stream1. This will cause `end` / `end_parsed` event never being emitted because `end` event is only emitted when all data being consumed 2. + +1. [Node.JS Readable Stream](https://github.com/nodejs/node/blob/master/lib/_stream_readable.js#L572-L583) +2. [Writable end Event](https://nodejs.org/api/stream.html#stream_event_end) ### record_parsed @@ -317,7 +341,58 @@ csv() }) ``` -## Transform +### done + +`done` event is emitted either after `end` or `error`. This indicates the processor has stopped. + +```js +const csv=require('csvtojson') +csv() +.on('done',(error)=>{ + //do some stuff +}) +``` + +if any error during parsing, it will be passed in callback. + +## Hook & Transform + +### Raw CSV Data Hook + +```js +const csv=require('csvtojson') +csv() +.preRawData((csvRawData,cb)=>{ + var newData=csvRawData.replace('some value','another value') + cb(newData); +}) +.on('json',(jsonObj)=>{ + +}); +``` + +the function in `preRawData` will be called directly with the string from upper stream. + +### CSV File Line Hook + +```js +const csv=require('csvtojson') +csv() +.preFileLine((fileLineString, lineIdx)=>{ + if (lineIdx === 2){ + return fileLineString.replace('some value','another value') + } + return fileLineString +}) +.on('json',(jsonObj)=>{ + +}); +``` + +the function is called each time a file line being found in csv stream. the `lineIdx` is the file line number in the file. The function should return a string to processor. + + +### Result transform ```js const csv=require('csvtojson') @@ -335,6 +410,8 @@ csv() `Transform` will cause some performance panelties because it voids optimisation mechanism. Try to use Node.js `Transform` class as downstream for transformation instead. + + ## Nested JSON Structure One of the powerful feature of `csvtojson` is the ability to convert csv line to a nested JSON by correctly defining its csv header row. This is default out-of-box feature. @@ -342,7 +419,7 @@ One of the powerful feature of `csvtojson` is the ability to convert csv line to Here is an example. Original CSV: ```csv -fieldA.title, fieldA.children[0].name, fieldA.children[0].id,fieldA.children[1].name, fieldA.children[1].employee[].name,fieldA.children[1].employee[].name, fieldA.address[],fieldA.address[], description +fieldA.title, fieldA.children.0.name, fieldA.children.0.id,fieldA.children.1.name, fieldA.children.1.employee.0.name,fieldA.children.1.employee.1.name, fieldA.address.0,fieldA.address.1, description Food Factory, Oscar, 0023, Tikka, Tim, Joe, 3 Lame Road, Grantstown, A fresh new food factory Kindom Garden, Ceil, 54, Pillow, Amst, Tom, 24 Shaker Street, HelloTown, Awesome castle @@ -464,9 +541,23 @@ This will create 3 extra workers. Main process will only be used for delegating See [here](https://github.com/Keyang/node-csvtojson/blob/develop/docs/performance.md#cpu-usage-leverage) for how `csvtojson` leverages CPU usage when using multi-cores. +### Limitations + +There are some limitations when using multi-core feature: + +* Does not support if a column contains line break. #Change Log +## 1.1.1 + +* Fix bugs: preProcessLine is not emitted +* Changed array definition in nested json structure to follow [lodash set] (https://lodash.com/docs/4.17.2#set) +* Only use first line of csv body for type inference +* added `done` event +* added `hooks` section +* removed `parserMgr` + ## 1.1.0 * Remove support of `new Converter(true)` diff --git a/test/data/complexJSONCSV b/test/data/complexJSONCSV index c416d9d..8feeb1a 100644 --- a/test/data/complexJSONCSV +++ b/test/data/complexJSONCSV @@ -1,3 +1,3 @@ -fieldA.title, fieldA.children[0].name, fieldA.children[0].id,fieldA.children[1].name, fieldA.children[1].employee[].name,fieldA.children[1].employee[].name, fieldA.address[],fieldA.address[], description +fieldA.title, fieldA.children.0.name, fieldA.children.0.id,fieldA.children.1.name, fieldA.children.1.employee.0.name,fieldA.children.1.employee.1.name, fieldA.address.0,fieldA.address.1, description Food Factory, Oscar, 0023, Tikka, Tim, Joe, 3 Lame Road, Grantstown, A fresh new food factory Kindom Garden, Ceil, 54, Pillow, Amst, Tom, 24 Shaker Street, HelloTown, Awesome castle diff --git a/test/data/dataIgnoreEmpty b/test/data/dataIgnoreEmpty index 7697de9..92fb227 100644 --- a/test/data/dataIgnoreEmpty +++ b/test/data/dataIgnoreEmpty @@ -1,4 +1,4 @@ -col1, col2[], col2[],col4.col3,col4.col5 +col1, col2.0, col2.1,col4.col3,col4.col5 d1,,d3,,world d2,d,d,d,d diff --git a/test/data/dataWithMismatchedColumn b/test/data/dataWithMismatchedColumn index 4966a8c..4d2ebb2 100644 --- a/test/data/dataWithMismatchedColumn +++ b/test/data/dataWithMismatchedColumn @@ -2,4 +2,4 @@ fieldA.title, fieldA.children[0].name, fieldA.children[0].id,fieldA.children[1]. Food Factory, Oscar, 0023, Tikka, Tim, Joe, 3 Lame Road, Grantstown, A fresh new food factory Kindom Garden, Ceil, 54, Pillow, Amst, Tom, 24 Shaker Street, HelloTown, Awesome castle Kindom Garden, Ceil, -Kindom Garden, Ceil, 54, Pillow, Amst, Tom, 24 Shaker Street, HelloTown, Awesome castle +Kindom Garden, Ceil, 54, Pillow, Amst, Tom, 24 Shaker Street, HelloTown, Awesome castle \ No newline at end of file diff --git a/test/data/dataWithType b/test/data/dataWithType index 24efb61..8e8398f 100644 --- a/test/data/dataWithType +++ b/test/data/dataWithType @@ -1,2 +1,2 @@ -column1,*flat*string#!user.name,column2,date#!column3, date#!colume4, column5, string#!column6 , string#!column7, number#!column8, column9,column10[0],column10[1],name#!,column11 +column1,*flat*string#!user.name,column2,date#!column3, date#!colume4, column5, string#!column6 , string#!column7, number#!column8, column9,column10.0,column10.1,name#!,column11 1234,hello world,a1234,2012-01-01,someinvaliddate, {"hello":"world"}, {"hello":"world"}, 1234,abcd, true,23,31,sss,[{"hello":"world"}] diff --git a/test/data/invalidHeader b/test/data/invalidHeader index ce9aeb7..a353646 100644 --- a/test/data/invalidHeader +++ b/test/data/invalidHeader @@ -1,4 +1,4 @@ -header1,header1.filed1,header1.file2,header2,header2.field1[],header2.field1[],header2.filed2 +header1,header1.filed1,header1.file2,header2,header2.field1.0,header2.field1.1,header2.filed2 5OlFPc,q7,ejpJdw,DIgNVqB7h9jI,f8ayrzv,undefinedzvTY3Qd3pSkKOk,S7cVvW7m50t9U 0TaUGQVPqPkOr,lT,GA,UPUuORnuaDjXdl,V6G4QFddmPH8b,65NxWPl,Lclhl0fy ,ex4,1gjT4YPJ,QtJ8S5TQ,M4zO4OppCAR4,Pg7VipESqZmHwY5,mPzz diff --git a/test/data/testData b/test/data/testData index 85635d5..4f57323 100644 --- a/test/data/testData +++ b/test/data/testData @@ -1,3 +1,3 @@ -date,*json*employee.name,*json*employee.age,*json*employee.number,*array*address,*array*address,*jsonarray*employee.key,*jsonarray*employee.key,*omit*id +date,employee.name,employee.age,employee.number,address.0,address.1,employee.key.0,employee.key.1,*omit*id 2012-02-12,Eric,31,51234,Dunno Street,Kilkeny Road,key1,key2,2 2012-03-06,Ted,28,51289,O FUTEBOL.¿,Tormore,key3,key4,4 \ No newline at end of file diff --git a/test/errorHandle.js b/test/errorHandle.js index e5b3c1f..96bf2bc 100644 --- a/test/errorHandle.js +++ b/test/errorHandle.js @@ -67,10 +67,14 @@ describe("Converter error handling", function() { if (tested === false) { assert(err.err === "column_mismatched"); tested = true; - done(); + // done(); } }); conv.on("json",function(){}) + conv.on('done',function(){ + assert(tested) + done(); + }) rs.pipe(conv); }); }); diff --git a/test/testCSVConverter.js b/test/testCSVConverter.js index 8f819dc..9c9a9a0 100644 --- a/test/testCSVConverter.js +++ b/test/testCSVConverter.js @@ -13,7 +13,7 @@ describe("CSV Converter", function () { var obj = new Converter(); var stream = fs.createReadStream(file); obj.on("end_parsed", function (obj) { - assert(obj.length === 2); + assert.equal(obj.length, 2); done(); }); stream.pipe(obj); @@ -191,7 +191,7 @@ describe("CSV Converter", function () { csvConverter.on("record_parsed",function (d){ assert(typeof d.column1 === "number"); assert(typeof d.column2 === "string"); - assert(d["date#!colume4"] === "someinvaliddate"); + assert.equal(d["date#!colume4"] , "someinvaliddate"); assert(d.column5.hello === "world"); assert(d.column6 === '{"hello":"world"}'); assert(d.column7 === "1234"); @@ -219,7 +219,7 @@ describe("CSV Converter", function () { assert( d["date#!column3"] === "2012-01-01"); assert(d["date#!colume4"] === "someinvaliddate"); assert(d.column5 === '{"hello":"world"}'); - assert(d["column6"] === '{"hello":"world"}'); + assert.equal(d["column6"] , '{"hello":"world"}'); assert(d["column7"] === "1234"); assert(d["column8"] === "abcd"); assert(d.column9 === "true"); @@ -285,8 +285,8 @@ describe("CSV Converter", function () { st.on("end_parsed",function (res){ var j = res[0]; assert(res.length === 3); - assert (j.col2.length === 1); - assert(j.col2[0] === "d3"); + assert (j.col2.length === 2); + assert(j.col2[1] === "d3"); assert(j.col4.col3 === undefined); assert(j.col4.col5 === "world"); assert(res[1].col1==="d2"); diff --git a/test/testCSVConverter2.js b/test/testCSVConverter2.js index 33d10d2..95828a2 100644 --- a/test/testCSVConverter2.js +++ b/test/testCSVConverter2.js @@ -1,230 +1,232 @@ var Converter = require("../libs/core/Converter.js"); -var csv=require("../"); +var csv = require("../"); var assert = require("assert"); var fs = require("fs"); -describe("CSV Converter", function() { - it("should convert from large csv string", function(done) { +describe("CSV Converter", function () { + it("should convert from large csv string", function (done) { var csvStr = fs.readFileSync(__dirname + "/data/large-csv-sample.csv", "utf8"); var conv = new Converter({ workerNum: 1 }); - conv.fromString(csvStr, function(err, res) { + conv.fromString(csvStr, function (err, res) { assert(!err); assert(res.length === 5290); done(); }); }); - it("should set eol ", function(done) { + it("should set eol ", function (done) { var rs = fs.createReadStream(__dirname + "/data/large-csv-sample.csv"); var conv = new Converter({ workerNum: 1, constructResult: false, - eol:"\n" + eol: "\n" }); - var count=0; - conv.on("record_parsed",function(rec){ - count++; + var count = 0; + conv.on("record_parsed", function (rec) { + count++; }); - conv.on("error",function(){ + conv.on("error", function () { console.log(arguments); }); - conv.on("end_parsed",function(){ + conv.on("end_parsed", function () { assert(count === 5290); done(); }); rs.pipe(conv); }); - it ("should convert tsv String",function(done){ - var tsv=__dirname+"/data/dataTsv"; - var csvStr=fs.readFileSync(tsv,"utf8"); - var conv=new Converter({workerNum:1,delimiter:"\t","checkType":false}); - conv.fromString(csvStr,function(err,res){ + it("should convert tsv String", function (done) { + var tsv = __dirname + "/data/dataTsv"; + var csvStr = fs.readFileSync(tsv, "utf8"); + var conv = new Converter({ workerNum: 1, delimiter: "\t", "checkType": false }); + conv.fromString(csvStr, function (err, res) { assert(!err); done(); }); }); - it ("should allow customised header with nohead csv string.",function(done){ + it("should allow customised header with nohead csv string.", function (done) { var testData = __dirname + "/data/noheadercsv"; - var rs = fs.readFileSync(testData,"utf8"); - var conv=new Converter({ - noheader:true, - headers:["a","b","c","e","f","g"] - }); - conv.fromString(rs,function(err,json){ - assert.equal(json[0].field7,40); - assert.equal(json[0].a,"CC102-PDMI-001"); + var rs = fs.readFileSync(testData, "utf8"); + var conv = new Converter({ + noheader: true, + headers: ["a", "b", "c", "e", "f", "g"] + }); + conv.fromString(rs, function (err, json) { + assert.equal(json[0].field7, 40); + assert.equal(json[0].a, "CC102-PDMI-001"); done(); }); }); - it ("should parse fromFile",function(done){ + it("should parse fromFile", function (done) { var csvFile = __dirname + "/data/large-csv-sample.csv"; var conv = new Converter({ workerNum: 3 }); - conv.fromFile(csvFile, function(err, res) { + conv.fromFile(csvFile, function (err, res) { assert(!err); - assert.equal(res.length , 5290); + assert.equal(res.length, 5290); done(); }); }); - it ("should fromFile should emit error",function(done){ + it("should fromFile should emit error", function (done) { var csvFile = __dirname + "/data/dataWithUnclosedQuotes"; var conv = new Converter({ workerNum: 1 }); - conv.fromFile(csvFile, function(err, res) { + conv.fromFile(csvFile, function (err, res) { assert(err); done(); }); }); - it ("should parse no header with dynamic column number",function(done){ + it("should parse no header with dynamic column number", function (done) { var testData = __dirname + "/data/noheaderWithVaryColumnNum"; - var rs = fs.readFileSync(testData,"utf8"); - var conv=new Converter({ - noheader:true + var rs = fs.readFileSync(testData, "utf8"); + var conv = new Converter({ + noheader: true }); - conv.fromString(rs,function(err,json){ - assert.equal(json.length,2); - assert.equal(json[1].field4,7); + conv.fromString(rs, function (err, json) { + assert.equal(json.length, 2); + assert.equal(json[1].field4, 7); done(); }); }); - it ("should parse tabsv data with dynamic columns",function(done){ + it("should parse tabsv data with dynamic columns", function (done) { var testData = __dirname + "/data/tabsv"; - var rs = fs.readFileSync(testData,"utf8"); - var conv=new Converter({ - delimiter:"\t" + var rs = fs.readFileSync(testData, "utf8"); + var conv = new Converter({ + delimiter: "\t" }); - conv.fromString(rs,function(err,json){ - assert.equal(json[0].Idevise,""); + conv.fromString(rs, function (err, json) { + assert.equal(json[0].Idevise, ""); done(); }); }); - it ("should use first line break as eol",function(done){ + it("should use first line break as eol", function (done) { var testData = __dirname + "/data/testEol"; - var conv=new Converter({ - noheader:true + var conv = new Converter({ + noheader: true }); - conv.fromFile(testData,function(err,json){ + conv.fromFile(testData, function (err, json) { assert(!err); done(); }); }) - it ("should use sync transform",function(done){ + it("should use sync transform", function (done) { var testData = __dirname + "/data/complexJSONCSV"; var rs = fs.createReadStream(testData); - var conv=new Converter({}); - conv.transform=function(json,row,index){ - json.rowNum=index; + var conv = new Converter({}); + conv.transform = function (json, row, index) { + json.rowNum = index; } - conv.on("record_parsed",function(j){ - assert(j.rowNum>=0); + conv.on("record_parsed", function (j) { + assert(j.rowNum >= 0); }); - conv.on("end_parsed",function(res){ - assert(res[0].rowNum===0); - assert(res[1].rowNum===1); + conv.on("end_parsed", function (res) { + assert(res[0].rowNum === 0); + assert(res[1].rowNum === 1); done(); }); rs.pipe(conv); }); - it ("should detect delimiter ",function(done){ + it("should detect delimiter ", function (done) { var testData = __dirname + "/data/dataWithAutoDelimiter"; var rs = fs.createReadStream(testData); - var conv=new Converter({delimiter:"auto"}); - conv.on("end_parsed",function(res){ - assert.equal(res[0].col1,"Mini. Sectt:hisar S.O"); - assert.equal(res[1].col1,"#Mini. Sectt"); + var conv = new Converter({ delimiter: "auto" }); + conv.on("end_parsed", function (res) { + assert.equal(res[0].col1, "Mini. Sectt:hisar S.O"); + assert.equal(res[1].col1, "#Mini. Sectt"); done(); }); rs.pipe(conv); }); - it ("should stripe out whitespaces if trim is true",function(done){ + it("should stripe out whitespaces if trim is true", function (done) { var testData = __dirname + "/data/dataWithWhiteSpace"; var rs = fs.createReadStream(testData); - var conv=new Converter({trim:true}); - conv.on("end_parsed",function(res){ + var conv = new Converter({ trim: true }); + conv.on("end_parsed", function (res) { // console.log(res); - assert.equal(res[0]["Column 1"],"Column1Row1"); - assert.equal(res[0]["Column 2"],"Column2Row1"); + assert.equal(res[0]["Column 1"], "Column1Row1"); + assert.equal(res[0]["Column 2"], "Column2Row1"); done(); }); rs.pipe(conv); }); - it ("should convert triple quotes correctly",function(done){ + it("should convert triple quotes correctly", function (done) { var testData = __dirname + "/data/dataWithTripleQoutes"; var rs = fs.createReadStream(testData); - var conv=new Converter({trim:true}); - conv.on("end_parsed",function(res){ - assert.equal(res[0].Description,"ac, abs, moon"); - assert.equal(res[1].Model,"Venture \"Extended Edition\""); - assert.equal(res[2].Model,"Venture \"Extended Edition, Very Large\""); + var conv = new Converter({ trim: true }); + conv.on("end_parsed", function (res) { + assert.equal(res[0].Description, "ac, abs, moon"); + assert.equal(res[1].Model, "Venture \"Extended Edition\""); + assert.equal(res[2].Model, "Venture \"Extended Edition, Very Large\""); done(); }); rs.pipe(conv); }); - it ("should auto flat header if header is not valid nested json keys",function(done){ - var testData = __dirname + "/data/invalidHeader"; - var rs = fs.createReadStream(testData); - var conv=new Converter(); - conv.on("end_parsed",function(res){ - assert.equal(res[0]["header1.filed1"],"q7"); - assert.equal(res[0]["header2.field1[]"],"undefinedzvTY3Qd3pSkKOk"); - done(); - }); - rs.pipe(conv); - }) - it ("should pre process raw data in the line",function(done){ + // it ("should auto flat header if header is not valid nested json keys",function(done){ + // var testData = __dirname + "/data/invalidHeader"; + // var rs = fs.createReadStream(testData); + // var conv=new Converter(); + // conv.on("end_parsed",function(res){ + // console.log(res[0]) + // assert.equal(res[0]["header1.filed1"],"q7"); + // assert.equal(res[0]["header2.field1[]"],"undefinedzvTY3Qd3pSkKOk"); + // done(); + // }); + // rs.pipe(conv); + // }) + it("should pre process raw data in the line", function (done) { var testData = __dirname + "/data/quoteTolerant"; var rs = fs.createReadStream(testData); - var conv=new Converter(); - conv.preProcessRaw=function(d,cb){ - d=d.replace('32"','32""'); + var conv = new Converter(); + conv.preRawData(function (d, cb) { + d = d.replace('THICK', 'THIN'); cb(d); - } - conv.on("end_parsed",function(res){ - assert(res[0].Description.indexOf('32"')>-1); + }) + conv.on("end_parsed", function (res) { + assert(res[0].Description.indexOf('THIN') > -1); done(); }); rs.pipe(conv); }) - it ("should pre process by line in the line",function(done){ + it("should pre process by line in the line", function (done) { var testData = __dirname + "/data/quoteTolerant"; var rs = fs.createReadStream(testData); - var conv=new Converter(); - conv.preProcessLine=function(line,lineNumber){ - if (lineNumber === 2){ - line=line.replace('32"','32""'); + var conv = new Converter(); + conv.preFileLine(function (line, lineNumber) { + if (lineNumber === 2) { + line = line.replace('THICK', 'THIN'); } - return line; - } - conv.on("end_parsed",function(res){ - assert(res[0].Description.indexOf('32"')>-1); + return line + }) + + conv.on("end_parsed", function (res) { + assert(res[0].Description.indexOf('THIN') > -1); done(); }); rs.pipe(conv); }) - it ("should support object mode",function(done){ + it("should support object mode", function (done) { var testData = __dirname + "/data/complexJSONCSV"; var rs = fs.createReadStream(testData); - var conv=new Converter({},{ - objectMode:true + var conv = new Converter({}, { + objectMode: true }); - conv.on("data",function(d){ - assert(typeof d === "object"); + conv.on("data", function (d) { + assert(typeof d === "object"); }); - conv.on("end_parsed",function(res){ + conv.on("end_parsed", function (res) { assert(res); - assert(res.length>0); + assert(res.length > 0); done(); }) rs.pipe(conv); }) - it ("should get delimiter automatically if there is no header",function(done){ + it("should get delimiter automatically if there is no header", function (done) { var test_converter = new Converter({ delimiter: 'auto', headers: ['col1', 'col2'], @@ -233,89 +235,111 @@ describe("CSV Converter", function() { }); var my_data = 'first_val\tsecond_val'; - test_converter.fromString(my_data, function(err, result) { + test_converter.fromString(my_data, function (err, result) { assert(!err); - assert.equal(result.length,1); - assert.equal(result[0].col1,"first_val"); - assert.equal(result[0].col2,"second_val"); + assert.equal(result.length, 1); + assert.equal(result[0].col1, "first_val"); + assert.equal(result[0].col2, "second_val"); done(); }); }); - it ("should process escape chars",function(done){ + it("should process escape chars", function (done) { var test_converter = new Converter({ - escape:"\\" + escape: "\\" }); var testData = __dirname + "/data/dataWithSlashEscape"; var rs = fs.createReadStream(testData); - test_converter.on("end_parsed",function(res){ - assert.equal(res[0].raw.hello,"world"); - assert.equal(res[0].raw.test,true); + test_converter.on("end_parsed", function (res) { + assert.equal(res[0].raw.hello, "world"); + assert.equal(res[0].raw.test, true); done(); }); rs.pipe(test_converter); }); - it ("should output ndjson format",function(done){ - var conv=new Converter(); - conv.fromString("a,b,c\n1,2,3\n4,5,6").on("data",function(d){ - d=d.toString(); - assert.equal(d[d.length-1],"\n") - }) - .on("end",done) + it("should output ndjson format", function (done) { + var conv = new Converter(); + conv.fromString("a,b,c\n1,2,3\n4,5,6").on("data", function (d) { + d = d.toString(); + assert.equal(d[d.length - 1], "\n") + }) + .on("end", done) }) - it ("should parse from stream",function(done){ + it("should parse from stream", function (done) { var testData = __dirname + "/data/complexJSONCSV"; var rs = fs.createReadStream(testData); csv() - .fromStream(rs) - .on("end_parsed",function(res){ - assert(res); - done(); - }) + .fromStream(rs) + .on("end_parsed", function (res) { + assert(res); + done(); + }) }) - it ("should emit json and csv event",function(done){ + it("should emit json and csv and finish event", function (done) { var testData = __dirname + "/data/complexJSONCSV"; var rs = fs.createReadStream(testData); - var numofrow=0; - var numofjson=0; + var numofrow = 0; + var numofjson = 0; csv() - .fromStream(rs) - .on('csv',function(row){ + .fromStream(rs) + .on('csv', function (row) { numofrow++; - }) - .on("json",function(res){ - numofjson++; - assert.equal(typeof res,"object") - }) - .on("end",function(){ - assert.equal(numofjson,numofrow) - assert(numofrow!=0) - done(); - }) + }) + .on("json", function (res) { + numofjson++; + assert.equal(typeof res, "object") + }) + .on("done", function (error) { + assert(!error) + assert.equal(numofjson, numofrow) + assert(numofrow != 0) + done(); + }) }) - it ("should transform with transf function",function(done){ + it("should transform with transf function", function (done) { var testData = __dirname + "/data/complexJSONCSV"; var rs = fs.createReadStream(testData); - var numofrow=0; - var numofjson=0; + var numofrow = 0; + var numofjson = 0; csv() - .fromStream(rs) - .transf(function(json,row,idx){ - json.a="test"; - }) - .on('csv',function(row){ + .fromStream(rs) + .transf(function (json, row, idx) { + json.a = "test"; + }) + .on('csv', function (row) { numofrow++; - }) - .on("json",function(res){ - numofjson++; - assert.equal(typeof res,"object") - assert.equal(res.a,"test") - }) - .on("end",function(){ - assert.equal(numofjson,numofrow) - assert(numofrow!=0) + }) + .on("json", function (res) { + numofjson++; + assert.equal(typeof res, "object") + assert.equal(res.a, "test") + }) + .on("end", function () { + assert.equal(numofjson, numofrow) + assert(numofrow != 0) + done(); + }) + }) + it("should parse a complex JSON", function (done) { + var converter = new Converter(); + var r = fs.createReadStream(__dirname + "/data/complexJSONCSV"); + converter.on("end_parsed", function (res) { + assert(res); + assert(res.length === 2); + assert(res[0].fieldA.title === "Food Factory"); + assert(res[0].fieldA.children.length === 2); + assert(res[0].fieldA.children[0].name === "Oscar"); + assert(res[0].fieldA.children[0].id === 23); + assert(res[0].fieldA.children[1].name === "Tikka"); + assert.equal(res[0].fieldA.children[1].employee.length, 2); + assert(res[0].fieldA.children[1].employee[0].name === "Tim", JSON.stringify(res[0].fieldA.children[1].employee[0])); + assert(res[0].fieldA.address.length === 2); + assert(res[0].fieldA.address[0] === "3 Lame Road"); + assert(res[0].fieldA.address[1] === "Grantstown"); + assert(res[0].description === "A fresh new food factory", res[0].description); done(); - }) + }); + r.pipe(converter); }) - + }); diff --git a/test/testParserMgr.js b/test/testParserMgr.js index 4f51a24..0cf7c3c 100644 --- a/test/testParserMgr.js +++ b/test/testParserMgr.js @@ -132,27 +132,7 @@ describe("ParserMgr", function() { assert(resultRow.myJSON.item1.arr.length === 3); assert(resultRow.myJSON.item1.arr[2].title === "item3"); }); - it("should parse a complex JSON's original CSV file", function (done) { - var converter = new Converter(); - var r = fs.createReadStream(__dirname + "/data/complexJSONCSV"); - converter.on("end_parsed", function (res) { - assert(res); - assert(res.length === 2); - assert(res[0].fieldA.title === "Food Factory"); - assert(res[0].fieldA.children.length === 2); - assert(res[0].fieldA.children[0].name === "Oscar"); - assert(res[0].fieldA.children[0].id === 23); - assert(res[0].fieldA.children[1].name === "Tikka"); - assert(res[0].fieldA.children[1].employee.length === 2); - assert(res[0].fieldA.children[1].employee[0].name === "Tim", JSON.stringify(res[0].fieldA.children[1].employee[0])); - assert(res[0].fieldA.address.length === 2); - assert(res[0].fieldA.address[0] === "3 Lame Road"); - assert(res[0].fieldA.address[1] === "Grantstown"); - assert(res[0].description === "A fresh new food factory",res[0].description); - done(); - }); - r.pipe(converter); - }); + it("should parse as flat json keys containing dots and square brackets in 'flatKeys' mode", function() { var parser1 = parserMgr.getParser("*json*myJSON.item[0].foo"); var parser2 = parserMgr.getParser("*json*myJSON.item[1].foo");