Skip to content

Commit

Permalink
update and export citation processor, allow null url in source node (#45
Browse files Browse the repository at this point in the history
)
  • Loading branch information
leehuwuj authored Feb 18, 2025
1 parent 1c2cea9 commit d7e63e7
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 10 deletions.
5 changes: 5 additions & 0 deletions .changeset/lemon-deers-cheer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@llamaindex/chat-ui': minor
---

Update citation processor format
7 changes: 4 additions & 3 deletions packages/chat-ui/src/chat/annotation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export type SourceNode = {
metadata: Record<string, unknown>
score?: number
text: string
url: string
url?: string
}

export type SourceData = {
Expand Down Expand Up @@ -145,11 +145,12 @@ function preprocessSourceNodes(nodes: SourceNode[]): SourceNode[] {
// Filter source nodes has lower score
const processedNodes = nodes
.filter(node => (node.score ?? 1) > NODE_SCORE_THRESHOLD)
.filter(node => node.url && node.url.trim() !== '')
.sort((a, b) => (b.score ?? 1) - (a.score ?? 1))
.map(node => {
// remove trailing slash for node url if exists
node.url = node.url.replace(/\/$/, '')
if (node.url) {
node.url = node.url.replace(/\/$/, '')
}
return node
})
return processedNodes
Expand Down
12 changes: 7 additions & 5 deletions packages/chat-ui/src/widgets/chat-sources.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ export function ChatSources({ data }: { data: SourceData }) {
const documents: Document[] = useMemo(() => {
// group nodes by document (a document must have a URL)
const nodesByUrl: Record<string, SourceNode[]> = {}
data.nodes.forEach(node => {
const key = node.url
nodesByUrl[key] ??= []
nodesByUrl[key].push(node)
})
data.nodes
.filter(node => node.url)
.forEach(node => {
const key = node.url ?? ''
nodesByUrl[key] ??= []
nodesByUrl[key].push(node)
})

// convert to array of documents
return Object.entries(nodesByUrl).map(([url, sources]) => ({
Expand Down
10 changes: 8 additions & 2 deletions packages/chat-ui/src/widgets/markdown.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,19 @@ const preprocessLaTeX = (content: string) => {
return inlineProcessedContent
}

const preprocessMedia = (content: string) => {
// Remove `sandbox:` from the beginning of the URL
// to fix OpenAI's models issue appending `sandbox:` to the relative URL
return content.replace(/(sandbox|attachment|snt):/g, '')
}

/**
* Update the citation flag [citation:id]() to the new format [citation:index](url)
*/
const preprocessCitations = (input: string, sources?: SourceData) => {
let content = input
if (sources) {
const citationRegex = /\[citation:(.+?)\]\(\)/g
const citationRegex = /\[citation:(.+?)\]/g
let match
// Find all the citation references in the content
while ((match = citationRegex.exec(content)) !== null) {
Expand All @@ -62,7 +68,7 @@ const preprocessCitations = (input: string, sources?: SourceData) => {
}

const preprocessContent = (content: string, sources?: SourceData) => {
return preprocessCitations(preprocessLaTeX(content), sources)
return preprocessCitations(preprocessLaTeX(preprocessMedia(content)), sources)
}

export function Markdown({
Expand Down

0 comments on commit d7e63e7

Please sign in to comment.