Skip to content

Commit

Permalink
Merge pull request #285 from ndaidong/v7.x.x
Browse files Browse the repository at this point in the history
v7.0.1
  • Loading branch information
ndaidong authored Aug 12, 2022
2 parents d415e55 + fb45060 commit bc25c16
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 151 deletions.
6 changes: 0 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@ extract(url).then((article) => {
})
```

##### Note:

> Since Node.js v14, ECMAScript modules [have became the official standard format](https://nodejs.org/docs/latest-v14.x/api/esm.html#esm_modules_ecmascript_modules).
> Just ensure that you are [using module system](https://nodejs.org/api/packages.html#determining-module-system) and enjoy with ES6 import/export syntax.

## APIs

- [.extract(String url | String html)](#extractstring-url--string-html)
Expand Down
98 changes: 49 additions & 49 deletions dist/article-parser.browser.js

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions dist/article-parser.browser.js.map

Large diffs are not rendered by default.

130 changes: 65 additions & 65 deletions dist/cjs/article-parser.js

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions dist/cjs/article-parser.js.map

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/cjs/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "article-parser-cjs",
"version": "7.0.0",
"version": "7.0.1",
"main": "./article-parser.js"
}
14 changes: 7 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "7.0.0",
"version": "7.0.1",
"name": "article-parser",
"description": "To extract main article from given URL",
"homepage": "https://ndaidong.github.io/article-parser-demo/",
Expand All @@ -21,7 +21,7 @@
"scripts": {
"lint": "standard .",
"pretest": "npm run lint",
"test": "cross-env NODE_ENV=test NODE_OPTIONS=--experimental-vm-modules jest --unhandled-rejections=strict",
"test": "cross-env NODE_ENV=test NODE_OPTIONS=--experimental-vm-modules jest --coverage=true",
"build": "node build",
"eval": "cross-env node eval",
"eval:cjs": "cross-env node eval.cjs",
Expand All @@ -30,13 +30,13 @@
"dependencies": {
"@mozilla/readability": "^0.4.2",
"axios": "^0.27.2",
"bellajs": "^11.0.3",
"html-crush": "^5.0.20",
"bellajs": "^11.0.4",
"html-crush": "^5.0.22",
"linkedom": "^0.14.12",
"sanitize-html": "^2.7.1",
"string-comparison": "^1.1.0",
"tldts": "^5.7.84",
"urlpattern-polyfill": "^5.0.5"
"tldts": "^5.7.89",
"urlpattern-polyfill": "^5.0.6"
},
"standard": {
"ignore": [
Expand All @@ -46,7 +46,7 @@
"devDependencies": {
"@types/sanitize-html": "^2.6.2",
"cross-env": "^7.0.3",
"esbuild": "^0.14.50",
"esbuild": "^0.15.1",
"jest": "^28.1.3",
"nock": "^13.2.9",
"standard": "^17.0.0"
Expand Down
2 changes: 1 addition & 1 deletion src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ const htmlCrushOptions = {
const parserOptions = {
wordsPerMinute: 300, // to estimate "time to read"
urlsCompareAlgorithm: 'levenshtein', // to find the best url from list
descriptionLengthThreshold: 210, // min num of chars required for description
descriptionLengthThreshold: 180, // min num of chars required for description
descriptionTruncateLen: 210, // max num of chars generated for description
contentLengthThreshold: 200 // content must have at least 200 chars
}
Expand Down
17 changes: 3 additions & 14 deletions src/utils/parseFromHtml.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,9 @@ import getTimeToRead from './getTimeToRead.js'
import { getParserOptions } from '../config.js'

const summarize = (desc, txt, threshold, maxlen) => {
const removeFirstParts = (str) => {
const arr = str.split(' - ')
if (arr.length > 1) {
arr.shift()
return arr.join(' ')
}
return str
}
const metadesc = removeFirstParts(desc)
if (metadesc.length > threshold) {
return metadesc
}
const extradesc = truncate(txt, maxlen).replace(/\n/g, ' ')
return removeFirstParts(extradesc)
return desc.length > threshold
? desc
: truncate(txt, maxlen).replace(/\n/g, ' ')
}

export default async (inputHtml, inputUrl = '') => {
Expand Down

0 comments on commit bc25c16

Please sign in to comment.