Skip to content

Commit

Permalink
v8.0.9
Browse files Browse the repository at this point in the history
- Stop using purified HTML to extract content (#388)
  • Loading branch information
ndaidong committed May 7, 2024
1 parent 32a17a2 commit b68c8db
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion examples/pupperteer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ Start server:
npm start
```

Open `http://localhost:3100/?url=https://www.techradar.com/televisions/samsungs-new-cheaper-oled-tvs-are-now-available-to-buy` to see the result.
Open `http://localhost:3100/?url=https://client-side-rendering.pages.dev/lorem-ipsum` to see the result.

---
8 changes: 4 additions & 4 deletions src/utils/parseFromHtml.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ const summarize = (desc, txt, threshold, maxlen) => { // eslint-disable-line
}

export default async (inputHtml, inputUrl = '', parserOptions = {}) => {
const html = purify(inputHtml)
const meta = extractMetaData(html)
const pureHtml = purify(inputHtml)
const meta = extractMetaData(pureHtml)

let title = meta.title

Expand All @@ -57,7 +57,7 @@ export default async (inputHtml, inputUrl = '', parserOptions = {}) => {

// gather title
if (!title) {
title = extractTitleWithReadability(html, inputUrl)
title = extractTitleWithReadability(pureHtml, inputUrl)
}
if (!title) {
return null
Expand Down Expand Up @@ -95,7 +95,7 @@ export default async (inputHtml, inputUrl = '', parserOptions = {}) => {
}
)

const content = fns(html)
const content = fns(inputHtml)

if (!content) {
return null
Expand Down

0 comments on commit b68c8db

Please sign in to comment.