Skip to content

Commit

Permalink
Add support for lists
Browse files Browse the repository at this point in the history
  • Loading branch information
ZeeshanZulfiqarAli committed Nov 4, 2024
1 parent 543ecbd commit c608b96
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 73 deletions.
76 changes: 3 additions & 73 deletions node-zerox/src/openAI.ts
Original file line number Diff line number Diff line change
@@ -1,76 +1,6 @@
import { CompletionArgs, CompletionResponse } from "./types";
import { convertKeysToSnakeCase, encodeImageToBase64 } from "./utils";
import { convertKeysToSnakeCase, encodeImageToBase64, markdownToJson } from "./utils";
import axios from "axios";
import { nanoid } from "nanoid";

const markdownToJson = async (markdownString: string) => {
/**
* Bypassing typescript transpiler using eval to use dynamic imports
*
* Source: https://stackoverflow.com/a/70546326
*/
const { unified } = await eval(`import('unified')`);
const { default: remarkParse } = await eval(`import('remark-parse')`);
const { remarkGfm } = await eval(`import('remark-gfm')`);

const parsedMd = unified()
.use(remarkParse) // Parse Markdown to AST
.use(remarkGfm)
.parse(markdownString);

const parentIdManager: string[] = [];

let depths = [0];

const jsonObj = parsedMd.children.map((node: any) => {
const isHeading = node.type === "heading";
if (isHeading && node.depth <= (depths.at(-1) || 0)) {
parentIdManager.pop();
// TODO: keep removing depth number till it reaches the one less than node.depth
depths.pop();
}
const processedNode = processNode(node, parentIdManager.at(-1));

if (isHeading) {
parentIdManager.push(processedNode.id);
if (depths.at(-1) !== node.depth) depths.push(node.depth);
}

return processedNode;
});

return jsonObj;
};

const type: Record<string, string> = {
heading: "heading",
text: "text",
};

const processNode = (node: any, parentId?: string) => {
let value: any;

if (node.type === "heading") {
value = node.children
.map((childNode: any) => processText(childNode))
.join(" ");
} else if (node.type === "paragraph") {
value = node.children
.map((childNode: any) => processText(childNode))
.join(" ");
}

return {
id: nanoid(),
parentId,
type: type[node.type as string] || type.text,
value,
};
};

const processText = (node: any) => {
return node.value;
};

export const getCompletion = async ({
apiKey,
Expand Down Expand Up @@ -128,8 +58,8 @@ export const getCompletion = async ({

const data = response.data;

// const jsonOutput = await markdownToJson(data.choices[0].message.content);
// console.log("====>>>>", JSON.stringify(jsonOutput, null, 2));
const jsonOutput = await markdownToJson(data.choices[0].message.content);
console.log("====>>>>", JSON.stringify(jsonOutput));

return {
content: data.choices[0].message.content,
Expand Down
131 changes: 131 additions & 0 deletions node-zerox/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import fs from "fs-extra";
import mime from "mime-types";
import path from "path";
import sharp from "sharp";
import { nanoid } from "nanoid";

const convertAsync = promisify(convert);

Expand Down Expand Up @@ -311,3 +312,133 @@ export const convertKeysToSnakeCase = (
Object.entries(obj).map(([key, value]) => [camelToSnakeCase(key), value])
);
};

interface ProcessedNode {
id: string;
parentId: string | undefined;
type: string;
value: any;
}
interface parentId {
id: string;
depth: number;
}

export const markdownToJson = async (markdownString: string) => {
/**
* Bypassing typescript transpiler using eval to use dynamic imports
*
* Source: https://stackoverflow.com/a/70546326
*/
const { unified } = await eval(`import('unified')`);
const { default: remarkParse } = await eval(`import('remark-parse')`);
const { remarkGfm } = await eval(`import('remark-gfm')`);

const parsedMd = unified()
.use(remarkParse) // Parse Markdown to AST
.use(remarkGfm)
.parse(markdownString);

console.log(JSON.stringify(parsedMd));

const parentIdManager: parentId[] = [];

const jsonObj: ProcessedNode[] = [];
parsedMd.children.forEach((node: any) => {
const isHeading = node.type === "heading";

if (isHeading && node.depth <= (parentIdManager.at(-1)?.depth || 0)) {
for (let i = parentIdManager.length; i > 0; i--) {
parentIdManager.pop();
if (node.depth > (parentIdManager.at(-1)?.depth || 0)) {
break;
}
}
}
const processedNode = processNode(node, parentIdManager.at(-1)?.id);

if (isHeading) {
parentIdManager.push({ id: processedNode[0].id, depth: node.depth });
}

jsonObj.push(...processedNode);
});

return jsonObj;
};

const type: Record<string, string> = {
heading: "heading",
text: "text",
list: "list",
};

const processNode = (node: any, parentId?: string): ProcessedNode[] => {
let value: any;
let siblingNodes: ProcessedNode[] = [];

if (node.type === "heading") {
value = node.children
.map((childNode: any) => processText(childNode))
.join(" ");
} else if (node.type === "paragraph") {
value = node.children
.map((childNode: any) => processText(childNode))
.join(" ");
} else if (node.type === "list") {
const processedNodes = node.children.map((childNode: any) =>
processListItem(childNode)
);
value = [];
processedNodes.forEach((pn: any) => {
value.push(...pn.node);
siblingNodes.push(...pn.siblings);
});
}

return [
{
id: nanoid(),
parentId,
type: type[node.type as string] || type.text,
value,
},
...(siblingNodes || []),
];
};

const processText = (node: any) => {
return node.value;
};

const processListItem = (node: any) => {
let newNode: ProcessedNode[] = [];
let siblings: ProcessedNode[] = [];

node.children.forEach((childNode: any) => {
if (childNode.type !== "list") {
const processedNode = processNode(childNode);
if (newNode.length > 0) {
newNode[0].value += processedNode.map(({ value }) => value).join(", ");
} else {
newNode[0] = processedNode[0];
}
siblings.push(...processedNode.slice(1));
} else {
if (newNode.length == 0) {
newNode = [
{
id: nanoid(),
type: "text",
value: "",
parentId: undefined,
},
];
}
const processedNode = processNode(childNode, newNode[0].id);
siblings.push(...processedNode);
}
});

return { node: newNode, siblings };
};
1 change: 1 addition & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c608b96

Please sign in to comment.