From c08d22cdf202a4679f2a77394d97513cd9f36fb6 Mon Sep 17 00:00:00 2001 From: Keyang Date: Wed, 3 May 2017 17:34:24 +0100 Subject: [PATCH] fix #158 --- libs/core/Converter.js | 65 ++++++++++++++++++++++++++++++++++++--- libs/core/defParam.js | 15 +++++++-- libs/core/filterRow.js | 19 ++++++++++++ libs/core/rowSplit.js | 22 ++----------- test/testCSVConverter3.js | 32 +++++++++++++++++++ 5 files changed, 128 insertions(+), 25 deletions(-) create mode 100644 libs/core/filterRow.js create mode 100644 test/testCSVConverter3.js diff --git a/libs/core/Converter.js b/libs/core/Converter.js index 58d947c..5dacb66 100644 --- a/libs/core/Converter.js +++ b/libs/core/Converter.js @@ -9,6 +9,8 @@ var fileLineToCSVLine = require("./fileLineToCSVLine"); var linesToJson = require("./linesToJson"); var CSVError = require("./CSVError"); var workerMgr = null; +var _ = require('lodash'); +var rowSplit = require("./rowSplit"); function Converter(params, options) { Transform.call(this, options); this._options = options || {}; @@ -212,10 +214,30 @@ Converter.prototype.processHead = function (fileLine, cb) { if (params._headers) { return cb(); } - + //dirty hack + params._needFilterRow = false; // if header is not inited. init header - var lines = fileLineToCSVLine(fileLine, params); - this.setPartialData(lines.partial); + var lines = fileLine.lines; + var left = ""; + var headerRow = []; + if (!params.noheader) { + while (lines.length) { + var line = left + lines.shift(); + var row = rowSplit(line, params); + if (row.closed) { + headerRow = row.cols; + left = ""; + break; + } else { + left = line + this.getEol(); + } + } + } + params._needFilterRow = true; + if (!params.noheader && headerRow.length === 0) { //if one chunk of data does not complete header row. + this.setPartialData(left); + return cb(); + } if (params.noheader) { if (params.headers) { params._headers = params.headers; @@ -223,13 +245,16 @@ Converter.prototype.processHead = function (fileLine, cb) { params._headers = []; } } else { - var headerRow = lines.lines.shift(); if (params.headers) { params._headers = params.headers; } else { params._headers = headerRow; } } + configIgnoreIncludeColumns(params); + params._headers = require("./filterRow")(params._headers, params); + var lines = fileLineToCSVLine(fileLine, params); + this.setPartialData(lines.partial); if (this.param.workerNum > 1) { this.workerMgr.setParams(params); } @@ -237,8 +262,40 @@ Converter.prototype.processHead = function (fileLine, cb) { this.processResult(res); this.lastIndex += res.length; this.recordNum += res.length; + cb(); }; +function configIgnoreIncludeColumns(params) { + if (params._postIgnoreColumns) { + for (var i = 0; i < params.ignoreColumns.length; i++) { + var ignoreCol = params.ignoreColumns[i]; + if (typeof ignoreCol === "string") { + var idx = params._headers.indexOf(ignoreCol); + if (idx > -1) { + params.ignoreColumns[i] = idx; + } else { + params.ignoreColumns[i] = -1; + } + } + } + params.ignoreColumns.sort(function (a, b) { return b - a; }); + } + if (params._postIncludeColumns) { + for (var i = 0; i < params.includeColumns.length; i++) { + var includeCol = params.includeColumns[i]; + if (typeof includeCol === "string") { + var idx = params._headers.indexOf(includeCol); + if (idx > -1) { + params.includeColumns[i] = idx; + } else { + params.includeColumns[i] = -1; + } + } + } + } + params.ignoreColumns = _.uniq(params.ignoreColumns); + params.includeColumns = _.uniq(params.includeColumns); +} Converter.prototype.processResult = function (result) { for (var i = 0, len = result.length; i < len; i++) { diff --git a/libs/core/defParam.js b/libs/core/defParam.js index e310681..5edee0b 100644 --- a/libs/core/defParam.js +++ b/libs/core/defParam.js @@ -1,3 +1,4 @@ +var numExp = /^[0-9]+$/; module.exports = function (params) { var _param = { constructResult: true, //set to false to not construct result in memory. suitable for big csv data @@ -23,7 +24,7 @@ module.exports = function (params) { _headerTitle: [], _headerFlag: [], _headers: null, - _needFilterRow:false + _needFilterRow: false }; if (!params) { params = {}; @@ -33,11 +34,21 @@ module.exports = function (params) { _param[key] = params[key]; } } + if (_param.ignoreColumns.length > 0 && !numExp.test(_param.ignoreColumns.join(""))) { + _param._postIgnoreColumns = true; + } + if (_param.includeColumns.length > 0 && !numExp.test(_param.includeColumns.join(""))) { + _param._postIncludeColumns = true; + } + if (_param.ignoreColumns.length || _param.includeColumns.length) { _param._needFilterRow = true; - _param.ignoreColumns.sort(function (a, b) { return b - a; }); + if (!_param._postIgnoreColumns){ + _param.ignoreColumns.sort(function (a, b) { return b-a;}); + } } + return _param; }; diff --git a/libs/core/filterRow.js b/libs/core/filterRow.js new file mode 100644 index 0000000..516edbb --- /dev/null +++ b/libs/core/filterRow.js @@ -0,0 +1,19 @@ +module.exports=function filterRow(row, param) { + if (param.ignoreColumns instanceof Array && param.ignoreColumns.length > 0) { + for (var igRow = 0, igColLen = param.ignoreColumns.length; igRow < igColLen; igRow++) { + if (param.ignoreColumns[igRow] >= 0) { + row.splice(param.ignoreColumns[igRow], 1); + } + } + } + if (param.includeColumns instanceof Array && param.includeColumns.length > 0) { + var cleanRowArr = []; + for (var inRow = 0, inColLen = param.includeColumns.length; inRow < inColLen; inRow++) { + if (param.includeColumns[inRow] >= 0) { + cleanRowArr.push(row[param.includeColumns[inRow]]); + } + } + row = cleanRowArr; + } + return row; +} \ No newline at end of file diff --git a/libs/core/rowSplit.js b/libs/core/rowSplit.js index 2f58dba..8219738 100644 --- a/libs/core/rowSplit.js +++ b/libs/core/rowSplit.js @@ -1,6 +1,8 @@ var getDelimiter = require("./getDelimiter"); +var filterRow=require("./filterRow"); /** * Convert a line of string to csv columns according to its delimiter + * the param._header may not be ready when this is called. * @param {[type]} rowStr [description] * @param {[type]} param [Converter param] * @return {[type]} {cols:["a","b","c"],closed:boolean} the closed field indicate if the row is a complete row @@ -82,25 +84,7 @@ module.exports = function rowSplit(rowStr, param) { }; -function filterRow(row, param) { - if (param.ignoreColumns instanceof Array && param.ignoreColumns.length > 0) { - for (var igRow = 0, igColLen = param.ignoreColumns.length; igRow < igColLen; igRow++) { - if (param.ignoreColumns[igRow] >= 0) { - row.splice(param.ignoreColumns[igRow], 1); - } - } - } - if (param.includeColumns instanceof Array && param.includeColumns.length > 0) { - var cleanRowArr = []; - for (var inRow = 0, inColLen = param.includeColumns.length; inRow < inColLen; inRow++) { - if (param.includeColumns[inRow] >= 0) { - cleanRowArr.push(row[param.includeColumns[inRow]]); - } - } - row = cleanRowArr; - } - return row; -} + function isQuoteOpen(str, param) { var quote = param.quote; diff --git a/test/testCSVConverter3.js b/test/testCSVConverter3.js new file mode 100644 index 0000000..92df88b --- /dev/null +++ b/test/testCSVConverter3.js @@ -0,0 +1,32 @@ +var csv = require("../"); +var assert = require("assert"); +var fs = require("fs"); +describe("CSV Converter", function () { + it ("should ignore column only once",function(done){ + csv({ + ignoreColumns:[0,0] + }) + .fromFile(__dirname+"/data/complexJSONCSV") + .on('json',function(json){ + assert(!json.fieldA.title); + assert(json.fieldA.children[0].name); + }) + .on('done',function(){ + done() + }); + }) + it ("should ignore column by header name",function(done){ + csv({ + ignoreColumns:[0,"fieldA.title",2] + }) + .fromFile(__dirname+"/data/complexJSONCSV") + .on('json',function(json){ + assert(!json.fieldA.title); + assert(json.fieldA.children[0].name); + assert(!json.fieldA.children[0].id); + }) + .on('done',function(){ + done() + }); + }) +});