Skip to content

Commit

Permalink
Feat: Add tag_kwd parameter to chunk configuration modal #4368 (#4414)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Feat: Add tag_kwd parameter to chunk configuration modal  #4368

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
  • Loading branch information
cike8899 authored Jan 8, 2025
1 parent 3d66d78 commit af43cb0
Show file tree
Hide file tree
Showing 13 changed files with 204 additions and 13 deletions.
6 changes: 6 additions & 0 deletions web/src/components/parse-configuration/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ export const showRaptorParseConfiguration = (parserId: string) => {
return !excludedParseMethods.includes(parserId);
};

export const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];

export const showTagItems = (parserId: string) => {
return !excludedTagParseMethods.includes(parserId);
};

// The three types "table", "resume" and "one" do not display this configuration.
const ParseConfiguration = () => {
const form = Form.useFormInstance();
Expand Down
13 changes: 13 additions & 0 deletions web/src/locales/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,19 @@ The above is the content you need to summarize.`,
searchTags: 'Search tags',
tagCloud: 'Cloud',
tagTable: 'Table',
tagSet: 'Tag set',
tagSetTip: `
<p> Selecting the 'Tag' knowledge bases helps to tag every chunks. </p>
<p>Query to those chunks will also be with tags too.</p>
This procedure will improve precision of retrieval by adding more information to the dataset, especially when there's a large set of chunks.
<p>Difference between tags and keywords:</p>
<ul>
<li>Tag is a close set which is defined and manipulated by user while keyword is an open set.</li>
<li>You need to upload tag sets with samples prior to use.</li>
<li>Keywords are generated by LLM which is expensive and time consuming.</li>
</ul>
`,
topnTags: 'Top-N Tags',
},
chunk: {
chunk: 'Chunk',
Expand Down
13 changes: 13 additions & 0 deletions web/src/locales/zh-traditional.ts
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,19 @@ export default {
searchTags: '搜尋標籤',
tagCloud: '雲端',
tagTable: '表',
tagSet: '標籤庫',
topnTags: 'Top-N 標籤',
tagSetTip: `
<p> 選擇「標籤」知識庫有助於標記每個區塊。 </p>
<p>對這些區塊的查詢也將帶有標籤。
此過程將透過向資料集添加更多資訊來提高檢索精度,特別是當存在大量區塊時。
<p>標籤和關鍵字的差異:</p>
<ul>
<li>標籤是一個閉集,由使用者定義和操作,而關鍵字是一個開集。
<li>您需要在使用前上傳包含範例的標籤集。
<li>關鍵字由 LLM 生成,既昂貴又耗時。
</ul>
`,
},
chunk: {
chunk: '解析塊',
Expand Down
13 changes: 13 additions & 0 deletions web/src/locales/zh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,19 @@ export default {
searchTags: '搜索标签',
tagCloud: '云',
tagTable: '表',
tagSet: '标签库',
topnTags: 'Top-N 标签',
tagSetTip: `
<p> 选择“标签”知识库有助于标记每个块。 </p>
<p>对这些块的查询也将带有标签。 </p>
此过程将通过向数据集添加更多信息来提高检索的准确性,尤其是在存在大量块的情况下。
<p>标签和关键字之间的区别:</p>
<ul>
<li>标签是一个由用户定义和操作的封闭集,而关键字是一个开放集。 </li>
<li>您需要在使用前上传带有样本的标签集。 </li>
<li>关键字由 LLM 生成,这既昂贵又耗时。 </li>
</ul>
`,
},
chunk: {
chunk: '解析块',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ type FieldType = {
interface kFProps {
doc_id: string;
chunkId: string | undefined;
parserId: string;
}

const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
Expand All @@ -21,32 +22,39 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
hideModal,
onOk,
loading,
parserId,
}) => {
const [form] = Form.useForm();
const [checked, setChecked] = useState(false);
const [keywords, setKeywords] = useState<string[]>([]);
const [question, setQuestion] = useState<string[]>([]);
const [tagKeyWords, setTagKeyWords] = useState<string[]>([]);
const { removeChunk } = useDeleteChunkByIds();
const { data } = useFetchChunk(chunkId);
const { t } = useTranslation();

const isTagParser = parserId === 'tag';

useEffect(() => {
if (data?.code === 0) {
const {
content_with_weight,
important_kwd = [],
available_int,
question_kwd = [],
tag_kwd = [],
} = data.data;
form.setFieldsValue({ content: content_with_weight });
setKeywords(important_kwd);
setQuestion(question_kwd);
setTagKeyWords(tag_kwd);
setChecked(available_int !== 0);
}

if (!chunkId) {
setKeywords([]);
setQuestion([]);
setTagKeyWords([]);
form.setFieldsValue({ content: undefined });
}
}, [data, form, chunkId]);
Expand All @@ -58,6 +66,7 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
content: values.content,
keywords, // keywords
question_kwd: question,
tag_kwd: tagKeyWords,
available_int: checked ? 1 : 0, // available_int
});
} catch (errorInfo) {
Expand Down Expand Up @@ -105,6 +114,12 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
</div>
<EditTag tags={question} setTags={setQuestion} />
</section>
{isTagParser && (
<section className="mt-4">
<p className="mb-2">{t('knowledgeConfiguration.tagName')} </p>
<EditTag tags={tagKeyWords} setTags={setTagKeyWords} />
</section>
)}
{chunkId && (
<section>
<Divider></Divider>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,13 @@ export const useUpdateChunk = () => {
keywords,
available_int,
question_kwd,
tag_kwd,
}: {
content: string;
keywords: string;
available_int: number;
question_kwd: string;
tag_kwd: string;
}) => {
const code = await createChunk({
content_with_weight: content,
Expand All @@ -113,6 +115,7 @@ export const useUpdateChunk = () => {
important_kwd: keywords, // keywords
available_int,
question_kwd,
tag_kwd,
});

if (code === 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ const Chunk = () => {
visible={chunkUpdatingVisible}
loading={chunkUpdatingLoading}
onOk={onChunkUpdatingOk}
parserId={documentInfo.parser_id}
/>
)}
<KnowledgeGraphModal></KnowledgeGraphModal>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ const ParsingActionCell = ({
<Dropdown
menu={{ items: chunkItems }}
trigger={['click']}
disabled={isRunning}
disabled={isRunning || record.parser_id === 'tag'}
>
<Button type="text" className={styles.iconButton}>
<ToolOutlined size={20} />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import styles from './index.less';
import { TagTabs } from './tag-tabs';
import { ImageMap } from './utils';

const { Title, Text } = Typography;
const { Text } = Typography;

const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
const parserList = useSelectParserList();
Expand Down Expand Up @@ -37,15 +37,15 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
<section className={styles.categoryPanelWrapper}>
{imageList.length > 0 ? (
<>
<Title level={5} className={styles.topTitle}>
<h5 className="font-semibold text-base mt-0 mb-1">
{`"${item.title}" ${t('methodTitle')}`}
</Title>
</h5>
<p
dangerouslySetInnerHTML={{
__html: DOMPurify.sanitize(item.description),
}}
></p>
<Title level={5}>{`"${item.title}" ${t('methodExamples')}`}</Title>
<h5 className="font-semibold text-base mt-4 mb-1">{`"${item.title}" ${t('methodExamples')}`}</h5>
<Text>{t('methodExamplesDescription')}</Text>
<Row gutter={[10, 10]} className={styles.imageRow}>
{imageList.map((x) => (
Expand All @@ -58,9 +58,9 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
</Col>
))}
</Row>
<Title level={5}>
<h5 className="font-semibold text-base mt-4 mb-1">
{item.title} {t('dialogueExamplesTitle')}
</Title>
</h5>
<Divider></Divider>
</>
) : (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import MaxTokenNumber from '@/components/max-token-number';
import PageRank from '@/components/page-rank';
import ParseConfiguration, {
showRaptorParseConfiguration,
showTagItems,
} from '@/components/parse-configuration';
import { useTranslate } from '@/hooks/common-hooks';
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
Expand All @@ -23,6 +24,7 @@ import {
useSubmitKnowledgeConfiguration,
} from './hooks';
import styles from './index.less';
import { TagItems } from './tag-item';

const { Option } = Select;

Expand Down Expand Up @@ -146,6 +148,8 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
{showRaptorParseConfiguration(parserId) && (
<ParseConfiguration></ParseConfiguration>
)}

{showTagItems(parserId) && <TagItems></TagItems>}
</>
);
}}
Expand Down
101 changes: 101 additions & 0 deletions web/src/pages/add-knowledge/components/knowledge-setting/tag-item.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import { useFetchKnowledgeList } from '@/hooks/knowledge-hooks';
import { UserOutlined } from '@ant-design/icons';
import {
Avatar,
Divider,
Flex,
Form,
InputNumber,
Select,
Slider,
Space,
} from 'antd';
import DOMPurify from 'dompurify';
import { useTranslation } from 'react-i18next';

export const TagSetItem = () => {
const { t } = useTranslation();

const { list: knowledgeList } = useFetchKnowledgeList(true);

const knowledgeOptions = knowledgeList
.filter((x) => x.parser_id === 'tag')
.map((x) => ({
label: (
<Space>
<Avatar size={20} icon={<UserOutlined />} src={x.avatar} />
{x.name}
</Space>
),
value: x.id,
}));

return (
<Form.Item
label={t('knowledgeConfiguration.tagSet')}
name={['parser_config', 'tag_kb_ids']}
tooltip={
<div
dangerouslySetInnerHTML={{
__html: DOMPurify.sanitize(t('knowledgeConfiguration.tagSetTip')),
}}
></div>
}
rules={[
{
message: t('chat.knowledgeBasesMessage'),
type: 'array',
},
]}
>
<Select
mode="multiple"
options={knowledgeOptions}
placeholder={t('chat.knowledgeBasesMessage')}
></Select>
</Form.Item>
);
};

export const TopNTagsItem = () => {
const { t } = useTranslation();

return (
<Form.Item label={t('knowledgeConfiguration.topnTags')}>
<Flex gap={20} align="center">
<Flex flex={1}>
<Form.Item
name={['parser_config', 'topn_tags']}
noStyle
initialValue={3}
>
<Slider max={10} min={1} style={{ width: '100%' }} />
</Form.Item>
</Flex>
<Form.Item name={['parser_config', 'topn_tags']} noStyle>
<InputNumber max={10} min={1} />
</Form.Item>
</Flex>
</Form.Item>
);
};

export function TagItems() {
return (
<>
<Divider />
<TagSetItem></TagSetItem>
<Form.Item noStyle dependencies={[['parser_config', 'tag_kb_ids']]}>
{({ getFieldValue }) => {
const ids: string[] = getFieldValue(['parser_config', 'tag_kb_ids']);

return (
Array.isArray(ids) &&
ids.length > 0 && <TopNTagsItem></TopNTagsItem>
);
}}
</Form.Item>
<Divider />
</>
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ export function TagTable() {
variant="ghost"
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
>
{t('knowledgeConfiguration.tag')}
{t('knowledgeConfiguration.tagName')}
<ArrowUpDown />
</Button>
);
Expand Down
Loading

0 comments on commit af43cb0

Please sign in to comment.