Skip to content

Commit

Permalink
Merge pull request #340 from extractus/7.2.14
Browse files Browse the repository at this point in the history
v7.2.14
  • Loading branch information
ndaidong authored Apr 18, 2023
2 parents 2715cd9 + 8b3d594 commit 87a9708
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 7 deletions.
4 changes: 2 additions & 2 deletions dist/article-extractor.esm.js

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions dist/cjs/article-extractor.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/cjs/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "@extractus/article-extractor",
"version": "7.2.13",
"version": "7.2.14",
"main": "./article-extractor.js"
}
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "7.2.13",
"version": "7.2.14",
"name": "@extractus/article-extractor",
"description": "To extract main article from given URL",
"homepage": "https://github.com/extractus/article-extractor",
Expand Down Expand Up @@ -40,7 +40,7 @@
},
"devDependencies": {
"@types/sanitize-html": "^2.9.0",
"esbuild": "^0.17.16",
"esbuild": "^0.17.17",
"eslint": "^8.38.0",
"jest": "^29.5.0",
"nock": "^13.3.0"
Expand Down
6 changes: 6 additions & 0 deletions src/utils/extractMetaData.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,19 @@ export default (html) => {
const urlAttrs = [
'og:url',
'twitter:url',
'parsely-link',
]
const titleAttrs = [
'title',
'og:title',
'twitter:title',
'parsely-title',
]
const descriptionAttrs = [
'description',
'og:description',
'twitter:description',
'parsely-description',
]
const imageAttrs = [
'image',
Expand All @@ -47,6 +50,7 @@ export default (html) => {
'og:image:secure_url',
'twitter:image',
'twitter:image:src',
'parsely-image-url',
]
const authorAttrs = [
'author',
Expand All @@ -55,6 +59,7 @@ export default (html) => {
'article:author',
'twitter:creator',
'dc.creator',
'parsely-author',
]
const publishedTimeAttrs = [
'article:published_time',
Expand All @@ -72,6 +77,7 @@ export default (html) => {
'published_time',
'release_date',
'date',
'parsely-pub-date',
]

const document = new DOMParser().parseFromString(html, 'text/html')
Expand Down
1 change: 1 addition & 0 deletions src/utils/parseFromHtml.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const summarize = (desc, txt, threshold, maxlen) => { // eslint-disable-line
export default async (inputHtml, inputUrl = '', parserOptions = {}) => {
const html = purify(inputHtml)
const meta = extractMetaData(html)

let title = meta.title

const {
Expand Down

0 comments on commit 87a9708

Please sign in to comment.