diff --git a/packages/react-pdf/src/components/Pages.tsx b/packages/react-pdf/src/components/Pages.tsx index 686b616..90f4df5 100644 --- a/packages/react-pdf/src/components/Pages.tsx +++ b/packages/react-pdf/src/components/Pages.tsx @@ -12,9 +12,14 @@ import {PageSvg} from './page/Svg' export interface PagesProps { renderMode?: 'canvas' | 'svg' lazyLoading?: boolean + tokenize?: boolean } -export const Page = memo(function Page({renderMode, pageNumber}: PagesProps & {pageNumber: number}) { +export const Page = memo(function Page({ + renderMode, + tokenize, + pageNumber, +}: Omit & {pageNumber: number}) { const {pdf} = usePdfContext() const [page, setPage] = useState() @@ -34,13 +39,13 @@ export const Page = memo(function Page({renderMode, pageNumber}: PagesProps & {p
{renderMode === 'canvas' && } {renderMode === 'svg' && } - +
) }) -export const Pages = memo(function Pages({renderMode, lazyLoading, children}: PropsWithChildren) { +export const Pages = memo(function Pages({renderMode, lazyLoading, tokenize, children}: PropsWithChildren) { const {pdf} = usePdfContext() const pageNumbers = useMemo(() => Array.from({length: pdf.numPages}, (_, index) => index + 1), [pdf.numPages]) const [renderPages, setRenderPages] = useState(pdf.numPages > 0 ? [1] : []) @@ -61,7 +66,7 @@ export const Pages = memo(function Pages({renderMode, lazyLoading, children}: Pr {(lazyLoading ? renderPages : pageNumbers).map((pageNumber) => { return (
- +
) })} diff --git a/packages/react-pdf/src/components/PdfViewer.tsx b/packages/react-pdf/src/components/PdfViewer.tsx index 6484bd2..45fbfc5 100644 --- a/packages/react-pdf/src/components/PdfViewer.tsx +++ b/packages/react-pdf/src/components/PdfViewer.tsx @@ -1,4 +1,4 @@ -import {ReactNode, useState} from 'react' +import {MouseEventHandler, ReactNode, useCallback, useState} from 'react' import {PDFProvider} from '../contexts/pdf' import {useIsomorphicLayoutEffect} from '../hooks/useIsomorphicLayoutEffect' @@ -23,7 +23,15 @@ export type PDFViewerProps = PagesProps & { } } -export function PDFViewer({pdfUrl, renderMode = 'canvas', header, footer, options}: PDFViewerProps) { +export function PDFViewer({ + pdfUrl, + renderMode = 'canvas', + tokenize, + onClickWords, + header, + footer, + options, +}: PDFViewerProps) { const [pdf, setPdf] = useState() useIsomorphicLayoutEffect(() => { @@ -47,15 +55,49 @@ export function PDFViewer({pdfUrl, renderMode = 'canvas', header, footer, option init() }, [options?.cMapCompressed, options?.cMapUrl, options?.withCredentials, pdf, pdfUrl]) + const handleClickWords: MouseEventHandler = useCallback( + async (e) => { + if (!onClickWords) { + return + } + const element = e.target as HTMLElement + const clickedText = (element?.innerText || '').trim() + const isSpanTag = element.tagName === 'SPAN' + if (!clickedText || !isSpanTag) { + return + } + for await (const {target, callback} of onClickWords) { + let result = false + if (typeof target === 'string') { + result = target === clickedText + } + if (target instanceof RegExp) { + result = target.test(clickedText) + } + if (result) { + await callback() + return + } + } + }, + [onClickWords], + ) + if (!pdf) { return null } return ( - - {header} - - {footer} - +
+ + {header} + 0} + /> + {footer} + +
) } diff --git a/packages/react-pdf/src/components/layer/Text.tsx b/packages/react-pdf/src/components/layer/Text.tsx index d162cfb..cd913f1 100644 --- a/packages/react-pdf/src/components/layer/Text.tsx +++ b/packages/react-pdf/src/components/layer/Text.tsx @@ -3,6 +3,7 @@ import {memo, useCallback, useMemo, useState} from 'react' import classNames from 'classnames/bind' import {useIsomorphicLayoutEffect} from '../../hooks/useIsomorphicLayoutEffect' +import {mergeTextItems} from '../../utils/text' import styles from './Text.module.scss' import type {TextContent, PDFPageProxy, TextContentItem} from '../../pdfjs-dist/types/pdfjs' @@ -68,20 +69,20 @@ export const TextLayerItem = memo(function TextLayerItem({ interface TextLayerProps { page: PDFPageProxy + tokenize?: boolean } -export const TextLayer = memo(function TextLayer({page}: TextLayerProps) { +export const TextLayer = memo(function TextLayer({page, tokenize}: TextLayerProps) { const [texts, setTexts] = useState() const viewport = page.getViewport({scale: 1}) useIsomorphicLayoutEffect(() => { async function init() { - const textContent = await page.getTextContent() - // TODO: chunking - setTexts(textContent) + const {items, styles: textStyles} = await page.getTextContent() + setTexts({items: mergeTextItems(items, {tokenize}), styles: textStyles}) } init() - }, [page]) + }, [page, tokenize]) if (!texts) { return null diff --git a/packages/react-pdf/src/utils/text.ts b/packages/react-pdf/src/utils/text.ts new file mode 100644 index 0000000..bcc03de --- /dev/null +++ b/packages/react-pdf/src/utils/text.ts @@ -0,0 +1,40 @@ +import type {TextContentItem} from '../pdfjs-dist/types/pdfjs' + +function tokenizeTextItems(texts: TextContentItem[]) { + return texts.reduce((result, textItem) => { + const {str, width, transform, ...rest} = textItem + const splittedStr = str.split(' ') + const strLength = str.length + const tokenizedStr = splittedStr.reduce((calculatedStr, s) => { + const currentStrWidth = s.trim().length === 0 ? 4.5 : Math.ceil((width / strLength) * s.length) + 5 + const reducedStrsLength = calculatedStr.length + const {width: lastWidth, transform: lastTransform} = + reducedStrsLength === 0 ? {width: 0, transform: [...transform]} : calculatedStr[reducedStrsLength - 1] + const newTransform = [...lastTransform] + newTransform[4] += lastWidth + (reducedStrsLength === 0 ? 0 : 3.5) + calculatedStr.push({str: s, width: currentStrWidth, transform: newTransform, ...rest}) + return calculatedStr + }, [] as TextContentItem[]) + return [...result, ...tokenizedStr] + }, [] as TextContentItem[]) +} + +export function mergeTextItems(texts: TextContentItem[], options?: {tokenize?: boolean}) { + const mergedTextItems = texts.reduce((result, token, index) => { + if (index === 0) { + result.push(token) + return result + } + const prev = result[result.length - 1] + // y 값을 비교하여, 같은 줄인지 확인 + if (prev.transform[5] === token.transform[5]) { + prev.str = prev.str + token.str + prev.width = prev.width + token.width + } else { + result.push(token) + } + + return result + }, [] as TextContentItem[]) + return options?.tokenize ? tokenizeTextItems(mergedTextItems) : mergedTextItems +}