Skip to content

Commit

Permalink
feat: add import documents from urls and build index for single/multi…
Browse files Browse the repository at this point in the history
…ple documents API (#129)

* feat: add upload multiple webpage API

* feat: add build knowledge graph index API
  • Loading branch information
Mini256 authored May 11, 2024
1 parent 5843b1b commit 477b63e
Show file tree
Hide file tree
Showing 25 changed files with 566 additions and 124 deletions.
2 changes: 1 addition & 1 deletion ddl/0-initial-schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ CREATE TABLE document_import_task
error_message TEXT NULL,
created_at DATETIME NOT NULL,
finished_at DATETIME NULL,
source_id INT NOT NULL,
source_id INT NULL,
document_id INT NULL,
document_operation ENUM('CREATE', 'UPDATE') NULL,
parent_task_id INT NULL,
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"@radix-ui/react-dialog": "^1.0.5",
"@radix-ui/react-dropdown-menu": "^2.0.6",
"@radix-ui/react-hover-card": "^1.0.7",
"@radix-ui/react-icons": "^1.3.0",
"@radix-ui/react-label": "^2.0.2",
"@radix-ui/react-menubar": "^1.0.4",
"@radix-ui/react-navigation-menu": "^1.1.4",
Expand Down
11 changes: 11 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import {BasicFormDialog} from "@/components/dialogs/basic-form-dialog";

import { FormField, FormItem, FormLabel } from '@/components/ui/form';
import { Textarea } from '@/components/ui/textarea';
import {buildDocumentIndex, BuildDocumentIndexOptions} from '@/client/operations/documents';
import type {ReactElement} from "react";

export function BuildDocumentIndexDialog (trigger: ReactElement) {
return (
<BasicFormDialog<BuildDocumentIndexOptions>
fromId="build-document-index-form"
trigger={trigger}
title="Build document index"
onSubmit={buildDocumentIndex}
submitButtonTitle={'Confirm'}
>
<FormItem>
<FormLabel>URL List</FormLabel>
<FormField
name="uriList"
render={({ field }) => <Textarea {...field} />}
/>
</FormItem>
</BasicFormDialog>
);
}
73 changes: 73 additions & 0 deletions src/app/(main)/(admin)/explore/components/columns.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import {DataTableRowActions} from "@/app/(main)/(admin)/explore/components/data-table-row-actions";
import {Tooltip, TooltipContent, TooltipTrigger} from "@/components/ui/tooltip";
import type {Document} from "@/core/repositories/document";
import type {CellContext, ColumnDef} from "@tanstack/react-table";
import {createColumnHelper} from "@tanstack/table-core";
import {format} from "date-fns";
import {GithubIcon} from "lucide-react";

const helper = createColumnHelper<Document>();

const mono = (cell: CellContext<any, any>) => <span className="font-mono">{cell.getValue()}</span>;

const datetime = (cell: CellContext<any, any>) => <time>{format(cell.getValue(), 'yyyy-MM-dd HH:mm')}</time>;

const GITHUB_PAGE_URL_REGEXP = /^https?:\/\/github\.com\/([^\/]+)\/([^\/]+)(?:\/blob\/([^/]+))?/;

export const columns = [
helper.accessor('id', {cell: mono}),
helper.accessor('name', {
cell: (cell: CellContext<any, any>) => <div className="flex space-x-2">
<span className="max-w-[500px] truncate font-medium">{cell.getValue()}</span>
</div>,
}),
helper.accessor('mime', {cell: mono}),
helper.accessor('source_uri', {
cell: (cell: CellContext<any, any>) => {
const value = cell.getValue();

const matched = GITHUB_PAGE_URL_REGEXP.exec(value);
if (matched) {
const [, owner, repo, branch] = matched;
return (
<Tooltip>
<TooltipTrigger asChild>
<a href={value} target="_blank" className="flex items-center">
<GithubIcon size="1em" className="mr-1"/>
<span>{owner}</span>
/
<span>{repo}</span>
{branch && <>/<span>{branch}</span></>}
</a>
</TooltipTrigger>
<TooltipContent className="text-xs">
{value}
</TooltipContent>
</Tooltip>
);
}

return <div className="flex space-x-2">
<span className="max-w-[300px] truncate font-medium">{cell.getValue()}</span>
</div>;
}
}),
helper.accessor('hash', {
cell: (cell: CellContext<any, any>) => <span className="font-mono">
{
cell.getValue() ?
cell.getValue().substring(0, 6) :
'N/A'
}
</span>
}),
helper.accessor('created_at', {
cell: datetime,
enableSorting: true
}),
helper.accessor('last_modified_at', {cell: datetime}),
{
id: "actions",
cell: ({ row }) => <DataTableRowActions row={row} />,
},
] as ColumnDef<Document>[];
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"use client"

import {BuildDocumentIndexDialog} from "@/app/(main)/(admin)/explore/components/build-document-index-dialog";
import { DotsHorizontalIcon } from "@radix-ui/react-icons"
import { Row } from "@tanstack/react-table"

import { Button } from "@/components/ui/button"
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuSeparator,
DropdownMenuShortcut,
DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu"


interface DataTableRowActionsProps<TData> {
row: Row<TData>
}

export function DataTableRowActions<TData>(props: DataTableRowActionsProps<TData>) {
return (
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button
variant="ghost"
className="flex h-8 w-8 p-0 data-[state=open]:bg-muted"
>
<DotsHorizontalIcon className="h-4 w-4" />
<span className="sr-only">Open menu</span>
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align="end" className="w-[160px]">
<DropdownMenuItem disabled={true}>
Build index (Coming soon)
</DropdownMenuItem>
<DropdownMenuSeparator />
<DropdownMenuItem disabled={true}>
Delete
<DropdownMenuShortcut>⌘⌫</DropdownMenuShortcut>
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
)
}
72 changes: 12 additions & 60 deletions src/app/(main)/(admin)/explore/page.tsx
Original file line number Diff line number Diff line change
@@ -1,72 +1,24 @@
'use client';

import { AdminPageHeading } from '@/components/admin-page-heading';
import { DocumentIndexStatusFilter } from '@/components/data-filters/document-index-status-filter';
import { SearchFilter } from '@/components/data-filters/search-filter';
import { DataTableHeading } from '@/components/data-table-heading';
import { DataTableRemote } from '@/components/data-table-remote';
import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip';
import type { Document } from '@/core/repositories/document';
import type { CellContext, ColumnDef } from '@tanstack/react-table';
import { createColumnHelper } from '@tanstack/table-core';
import { format } from 'date-fns';
import { GithubIcon } from 'lucide-react';

const helper = createColumnHelper<Document>();

const mono = (cell: CellContext<any, any>) => <span className="font-mono">{cell.getValue()}</span>;

const sourceUri = (cell: CellContext<any, any>) => {
const value = cell.getValue();
const REGEXP = /^https?:\/\/github\.com\/([^\/]+)\/([^\/]+)(?:\/blob\/([^/]+))?/;

const matched = REGEXP.exec(value);
if (matched) {
const [, owner, repo, branch] = matched;

return (
<Tooltip>
<TooltipTrigger asChild>
<a href={value} target="_blank" className="flex items-center">
<GithubIcon size="1em" className="mr-1" />
<span>{owner}</span>
/
<span>{repo}</span>
{branch && <>/<span>{branch}</span></>}
</a>
</TooltipTrigger>
<TooltipContent className="text-xs">
{value}
</TooltipContent>
</Tooltip>
);
}
return <a href={value} target="_blank">{value}</a>;
};

const datetime = (cell: CellContext<any, any>) => <time>{format(cell.getValue(), 'yyyy-MM-dd HH:mm')}</time>;

const columns = [
helper.accessor('id', { cell: mono }),
helper.accessor('name', { cell: mono }),
helper.accessor('mime', { cell: mono }),
helper.accessor('source_uri', { cell: sourceUri }),
helper.accessor('hash', { cell: mono }),
helper.accessor('created_at', { cell: datetime }),
helper.accessor('last_modified_at', { cell: datetime }),
] as ColumnDef<Document>[];

export default function Page () {
import {columns} from "@/app/(main)/(admin)/explore/components/columns";
import {AdminPageHeading} from '@/components/admin-page-heading';
import {DocumentIndexStatusFilter} from '@/components/data-filters/document-index-status-filter';
import {SearchFilter} from '@/components/data-filters/search-filter';
import {DataTableHeading} from '@/components/data-table-heading';
import {DataTableRemote} from '@/components/data-table-remote';

export default function Page() {
return (
<>
<AdminPageHeading title="Explore all documents" />
<AdminPageHeading title="Documents" description="Uploading, managing, and indexing various documents" />
<DataTableRemote
before={(
<DataTableHeading>
<SearchFilter />
<DocumentIndexStatusFilter />
<SearchFilter placeholder="Filter documents..."/>
<DocumentIndexStatusFilter/>
</DataTableHeading>
)}
selectable={true}
columns={columns}
api="/api/v1/documents"
idColumn="id"
Expand Down
43 changes: 43 additions & 0 deletions src/app/api/v1/documents/import/from/urls/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import {DefaultDocumentImportService, DocumentImportService} from "@/core/services/importing";
import { defineHandler } from '@/lib/next/handler';
import {baseRegistry} from "@/rag-spec/base";
import {getFlow} from "@/rag-spec/createFlow";
import { NextResponse } from "next/server";
import {z} from "zod";

const ImportDocumentsFromUrlsOptionsSchema = z.object({
urls: z.string()
.url('The format of URL is incorrect.')
.array()
.min(1, 'Must provide at least one URL for importing.')
});

export const POST = defineHandler({
auth: 'admin',
body: ImportDocumentsFromUrlsOptionsSchema
}, async ({ body}) => {
const { urls } = body;

const encoder = new TextEncoder();
const readableStream = new ReadableStream({
async pull(controller) {
const service = new DefaultDocumentImportService({ flow: await getFlow(baseRegistry) });
const taskIds = await DocumentImportService.createTasksByURLs(urls);
console.log('Create document import tasks: ', taskIds);

const process = await service.runTasks(10, taskIds, (process) => {
controller.enqueue(encoder.encode(JSON.stringify(process)));
});

controller.enqueue(encoder.encode(JSON.stringify(process)));
controller.close();
},
});
return new NextResponse(readableStream, {
headers: {
'Content-Type': 'application/json; charset=utf-8',
},
});
});

export const dynamic = 'force-dynamic';
56 changes: 56 additions & 0 deletions src/app/api/v1/documents/index/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import {getIndexByNameOrThrow} from "@/core/repositories/index_";
import {DocumentIndexService} from "@/core/services/indexing";
import { defineHandler } from '@/lib/next/handler';
import {z} from "zod";

const IndexDocumentsOptionsSchema = z.object({
documentIds: z.number()
.int()
.array()
.min(1, 'Must provide at least one document'),
indexName: z.string()
});

export const POST = defineHandler({
auth: 'admin',
body: IndexDocumentsOptionsSchema
}, async ({ body}) => {
const { documentIds, indexName } = body;

const index = await getIndexByNameOrThrow(indexName);
const documentIdStr = documentIds.map((id) => `#${id}`).join(', ')
console.log(`Creating index for documents ${documentIdStr} with index <${indexName}> (provider: ${index.config.provider})`);

const service = new DocumentIndexService();
await service.prepareProviders();

// Create document index tasks.
const taskIds = await DocumentIndexService.createDocumentIndexTasksByDocumentIds(documentIds, index.id);
const taskIdStr = taskIds.map((id) => `#${id}`).join(', ')
console.log(`Create document index tasks ${taskIdStr}.`);

// Execute document index tasks.
const results = await Promise.allSettled(
taskIds.map(taskId => service.runDocumentIndexTask(taskId))
);
const succeed: number[] = [];
const failed: { taskId: number, reason: string; }[] = [];

results.forEach((result, i) => {
if (result.status === 'fulfilled') {
succeed.push(taskIds[i]);
} else {
failed.push({
taskId: taskIds[i],
reason: result.reason.message
});
}
});

return {
succeed,
failed
}
});

export const dynamic = 'force-dynamic';
Loading

0 comments on commit 477b63e

Please sign in to comment.