diff --git a/code/shared/data.ts b/code/shared/data.ts new file mode 100644 index 0000000..3e61f50 --- /dev/null +++ b/code/shared/data.ts @@ -0,0 +1,270 @@ +// import { Csv2md } from 'csv2md' +import { parse, Options } from 'csv-parse/browser/esm/sync' +import typeOf from 'type-detect' +import escapeSqlString from 'sql-escape-string' +import { + unflattenObject, + flattenObject, + flattenObjectSafe, +} from './object' +import _ from 'lodash' +import sortKeys from 'json-keys-sort' + +// let csv2md = new Csv2md({ +// pretty: true, +// }) + +// export async function convertCSVToMarkdownTable(text: string) { +// let markdown = await csv2md.convert(text) +// return markdown +// } + +export async function convertCSVToJSON( + text: string, + { + trim = true, + columns = true, + bom = true, + objname, + group_columns_by_name, + skip_empty_lines, + record_delimiter, + nested_delimiter = '.', + }: Options & { nested_delimiter?: string }, +) { + const records = parse(text, { + trim, + columns, + bom, + objname, + group_columns_by_name, + skip_empty_lines, + record_delimiter, + }).map((record: Record) => + unflattenObject(record, nested_delimiter), + ) + + return records +} + +// export function convertCsvToSql() { +// const records = convertCSVToJSON(text) +// return convertJsonToSql(records) +// } + +export type TableColumnBuilder = { + types: Record + index: number + min?: number + max?: number + size?: number + precision?: number +} + +export type TableColumn = { + type: string + min?: number + max?: number + size?: number + precision?: number +} + +export type Table = { + columns: Record + columnByIndex: Record + rows: Array> +} + +export type TableBuilder = { + columnIndex: number + columns: Record + columnByIndex: Record + rows: Array> +} + +export function convertJsonToSql(records) { + const table = convertJsonToTable( + records as Array>, + ) + return convertTableToPostgresqlOrMySql('table', table) +} + +export function convertTableToPostgresqlOrMySql( + name: string, + table: Table, +) { + const text: Array = [] + text.push(`CREATE TABLE \`${name}\` (`) + for (const name in table.columns) { + const column = table.columns[name] as TableColumn + if (column.type === 'json') { + text.push(` \`${name}\` JSON,`) + } else if (column.type === 'boolean') { + text.push(` \`${name}\` BOOLEAN,`) + } else if (column.type === 'string') { + text.push(` \`${name}\` VARCHAR(${column.size}),`) + } else if ( + column.type === 'integer' || + column.type === 'natural_number' + ) { + if (column.min != null && column.max != null) { + if (column.min >= -32768 && column.max <= 32767) { + text.push(` \`${name}\` SMALLINT,`) + } else if ( + column.min >= -2147483648 && + column.max <= 2147483647 + ) { + text.push(` \`${name}\` INTEGER,`) + } else if ( + BigInt(column.min) >= -9223372036854775808n && + BigInt(column.max) <= 9223372036854775807n + ) { + text.push(` \`${name}\` BIGINT,`) + } + } + } else if (column.type === 'float') { + if (column.precision != null) { + if (column.precision <= 6) { + text.push(` \`${name}\` REAL,`) + } else if (column.precision <= 15) { + text.push(` \`${name}\` DOUBLE PRECISION,`) + } else { + text.push(` \`${name}\` DECIMAL,`) + } + } + } + } + text.push(`);`) + text.push(``) + text.push(`INSERT INTO \`${name}\` VALUES`) + table.rows.forEach(row => { + const line: Array = [] + for (let i = 0; i < row.length; i++) { + const val = row[i] + const column = table.columnByIndex[i] as TableColumn + if (column.type === 'string') { + line.push(escapeSqlString(val ? (val as string) : '') as string) + } else if (column.type === 'json') { + line.push(escapeSqlString(JSON.stringify(val)) as string) + } else { + line.push(val) + } + } + text.push(`(${line.join(', ')})`) + }) + + return text.join('\n') +} + +// https://www.npmjs.com/package/sqlstring +export function convertJsonToTable( + records: Array>, +) { + const table: TableBuilder = { + columnIndex: 0, + columns: {}, + columnByIndex: {}, + rows: [], + } + + records.forEach(record => { + const flattened = sortKeys.sort( + flattenObjectSafe(record) as Record, + ) + const row: Array = [] + table.rows.push(row) + for (const name in flattened) { + if (!table.columns[name]) { + table.columnByIndex[table.columnIndex] = table.columns[name] = { + types: {}, + index: table.columnIndex, + } + table.columnIndex++ + } + + let column = table.columns[name] as TableColumnBuilder + + let value = flattened[name] + let type = typeOf(value) + if (type === 'number') { + column.min = Math.min(column.min || 0, value as number) + column.max = Math.max(column.max || 0, value as number) + if (_.isInteger(value)) { + if (value >= 0) { + type = 'uint' + } else { + type = 'int' + } + } else { + type = 'float' + column.precision = Math.max( + String(value).split('.').pop()?.length ?? 1, + column.precision ?? 1, + ) + } + } else if (type === 'Array') { + type = 'json' + } else if (type === 'boolean') { + type = 'boolean' + } else if (type === 'string') { + column.size = Math.max( + column.size || 4, + (value as string).length, + ) + } else { + throw new Error(`${type} not handled yet.`) + } + + column.types[type] = true + row[column.index] = value + } + }) + + const newTable: Table = { + columns: {}, + columnByIndex: {}, + rows: table.rows, + } + for (const name in table.columns) { + const column = table.columns[name] as TableColumnBuilder + delete table.columns[name] + + if (column.types.json) { + newTable.columnByIndex[column.index] = newTable.columns[name] = { + type: 'json', + } + } else if (column.types.string) { + newTable.columnByIndex[column.index] = newTable.columns[name] = { + type: 'string', + size: roundNearestMultiple(column.size ?? 8, 8), + } + } else if (column.types.float) { + newTable.columnByIndex[column.index] = newTable.columns[name] = { + type: 'float', + precision: column.precision, + } + } else if (column.types.int) { + newTable.columnByIndex[column.index] = newTable.columns[name] = { + type: 'integer', + min: column.min, + max: column.max, + } + } else if (column.types.uint) { + newTable.columnByIndex[column.index] = newTable.columns[name] = { + type: 'natural_number', + min: column.min, + max: column.max, + } + } else if (column.types.boolean) { + newTable.columnByIndex[column.index] = newTable.columns[name] = { + type: 'boolean', + } + } + } + + return newTable +} + +export function roundNearestMultiple(x: number, n: number) { + return Math.ceil(x / n) * n +} diff --git a/code/shared/object.ts b/code/shared/object.ts new file mode 100644 index 0000000..7f84d2e --- /dev/null +++ b/code/shared/object.ts @@ -0,0 +1,16 @@ +import { flatten, unflatten } from 'flat' + +export function flattenObjectSafe( + obj: object, + delimiter: string = '.', +) { + return flatten(obj, { delimiter, safe: true }) +} + +export function flattenObject(obj: object, delimiter: string = '.') { + return flatten(obj, { delimiter }) +} + +export function unflattenObject(obj: object, delimiter: string = '.') { + return unflatten(obj, { delimiter }) +} diff --git a/package.json b/package.json index 5df064c..8e081ca 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "@termsurf/form": "^0.4.72", "@types/archiver": "^6.0.2", "@types/bytes": "^3.1.4", + "@types/flat": "^5.0.5", "@types/memorystream": "^0.3.4", "@types/node-forge": "^1.3.11", "@types/replace-ext": "^2.0.2", @@ -42,9 +43,12 @@ "bytes": "^3.1.2", "chalk": "^5.3.0", "concurrently": "^8.2.2", + "csv-parse": "^5.5.3", "dayjs": "^1.11.10", "fast-glob": "^3.3.2", + "flat": "^6.0.1", "gifsicle-wasm-browser": "^1.5.16", + "json-keys-sort": "^2.1.0", "libreoffice-convert": "^1.5.1", "memory-streams": "^0.1.3", "memorystream": "^0.3.1", @@ -55,11 +59,13 @@ "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2", "replace-ext": "^2.0.0", + "sql-escape-string": "^1.1.0", "svgexport": "^0.4.2", "tmp-promise": "^3.0.3", "tree-sitter": "^0.20.6", "ts-custom-error": "^3.3.1", "tsc-alias": "^1.8.8", + "type-detect": "^4.0.8", "typescript": "^5.3.3", "unrar": "^0.2.0", "web-tree-sitter": "^0.20.8", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 856feae..60f4780 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -29,6 +29,9 @@ dependencies: '@types/bytes': specifier: ^3.1.4 version: 3.1.4 + '@types/flat': + specifier: ^5.0.5 + version: 5.0.5 '@types/memorystream': specifier: ^0.3.4 version: 0.3.4 @@ -50,15 +53,24 @@ dependencies: concurrently: specifier: ^8.2.2 version: 8.2.2 + csv-parse: + specifier: ^5.5.3 + version: 5.5.3 dayjs: specifier: ^1.11.10 version: 1.11.10 fast-glob: specifier: ^3.3.2 version: 3.3.2 + flat: + specifier: ^6.0.1 + version: 6.0.1 gifsicle-wasm-browser: specifier: ^1.5.16 version: 1.5.16 + json-keys-sort: + specifier: ^2.1.0 + version: 2.1.0 libreoffice-convert: specifier: ^1.5.1 version: 1.5.1 @@ -89,6 +101,9 @@ dependencies: replace-ext: specifier: ^2.0.0 version: 2.0.0 + sql-escape-string: + specifier: ^1.1.0 + version: 1.1.0 svgexport: specifier: ^0.4.2 version: 0.4.2 @@ -104,6 +119,9 @@ dependencies: tsc-alias: specifier: ^1.8.8 version: 1.8.8 + type-detect: + specifier: ^4.0.8 + version: 4.0.8 typescript: specifier: ^5.3.3 version: 5.3.3 @@ -1167,6 +1185,10 @@ packages: '@types/ms': 0.7.34 dev: false + /@types/flat@5.0.5: + resolution: {integrity: sha512-nPLljZQKSnac53KDUDzuzdRfGI0TDb5qPrb+SrQyN3MtdQrOnGsKniHN1iYZsJEBIVQve94Y6gNz22sgISZq+Q==} + dev: false + /@types/json-schema@7.0.15: resolution: {integrity: sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==} dev: true @@ -1918,6 +1940,10 @@ packages: resolution: {integrity: sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==} dev: true + /csv-parse@5.5.3: + resolution: {integrity: sha512-v0KW6C0qlZzoGjk6u5tLmVfyZxNgPGXZsWTXshpAgKVGmGXzaVWGdlCFxNx5iuzcXT/oJN1HHM9DZKwtAtYa+A==} + dev: false + /damerau-levenshtein@1.0.8: resolution: {integrity: sha512-sdQSFB7+llfUcQHUQO3+B8ERRj0Oa4w9POWMI/puGtuf7gFywGmkaLCElnudfTiKZV+NvHqL0ifzdrI8Ro7ESA==} dev: true @@ -2768,6 +2794,12 @@ packages: rimraf: 3.0.2 dev: true + /flat@6.0.1: + resolution: {integrity: sha512-/3FfIa8mbrg3xE7+wAhWeV+bd7L2Mof+xtZb5dRDKZ+wDvYJK4WDYeIOuOhre5Yv5aQObZrlbRmk3RTSiuQBtw==} + engines: {node: '>=18'} + hasBin: true + dev: false + /flatted@3.2.9: resolution: {integrity: sha512-36yxDn5H7OFZQla0/jFJmbIKTdZAQHngCedGxiMmpNfEZM0sdEeT+WczLQrjK6D7o2aiyLYDnkw0R3JK0Qv1RQ==} dev: true @@ -3345,6 +3377,10 @@ packages: resolution: {integrity: sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==} dev: true + /json-keys-sort@2.1.0: + resolution: {integrity: sha512-Xo8Ep77w1K+GZ2DZ7S74Pyjlgqr3RP2tgFPDinwl5Z6YOculeDUJKosPScNzEZkhCvDD4W81r3qY7cWN+n98RQ==} + dev: false + /json-parse-even-better-errors@2.3.1: resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==} dev: false @@ -4733,6 +4769,10 @@ packages: resolution: {integrity: sha512-zC8zGoGkmc8J9ndvml8Xksr1Amk9qBujgbF0JAIWO7kXr43w0h/0GJNM/Vustixu+YE8N/MTrQ7N31FvHUACxQ==} dev: false + /sql-escape-string@1.1.0: + resolution: {integrity: sha512-/kqO4pLZSLfV0KsBM2xkVh2S3GbjJJone37d7gYwLyP0c+REh3vnmkhQ7VwNrX76igC0OhJWpTg0ukkdef9vvA==} + dev: false + /streamx@2.15.6: resolution: {integrity: sha512-q+vQL4AAz+FdfT137VF69Cc/APqUbxy+MDOImRrMvchJpigHj9GksgDU2LYbO9rx7RX6osWgxJB2WxhYv4SZAw==} dependencies: @@ -5014,6 +5054,11 @@ packages: prelude-ls: 1.2.1 dev: true + /type-detect@4.0.8: + resolution: {integrity: sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==} + engines: {node: '>=4'} + dev: false + /type-fest@0.20.2: resolution: {integrity: sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==} engines: {node: '>=10'} diff --git a/tsconfig.json b/tsconfig.json index cc65ca9..efc4ea7 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,7 +1,7 @@ { "compilerOptions": { "module": "ESNext", - "target": "ES6", + "target": "ES2020", "lib": ["es2020", "dom"], "outDir": "host", "rootDir": ".",