Skip to content

Commit

Permalink
Merge pull request #283 from ndaidong/v7.x.x
Browse files Browse the repository at this point in the history
v7.0.0
  • Loading branch information
ndaidong authored Jul 27, 2022
2 parents fc1e720 + 67f8af1 commit d415e55
Show file tree
Hide file tree
Showing 11 changed files with 140 additions and 167 deletions.
100 changes: 50 additions & 50 deletions dist/article-parser.browser.js

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions dist/article-parser.browser.js.map

Large diffs are not rendered by default.

122 changes: 61 additions & 61 deletions dist/cjs/article-parser.js

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions dist/cjs/article-parser.js.map

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/cjs/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "article-parser-cjs",
"version": "7.0.0rc4",
"version": "7.0.0",
"main": "./article-parser.js"
}
18 changes: 8 additions & 10 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "7.0.0rc4",
"version": "7.0.0",
"name": "article-parser",
"description": "To extract main article from given URL",
"homepage": "https://ndaidong.github.io/article-parser-demo/",
Expand All @@ -8,8 +8,7 @@
"url": "[email protected]:ndaidong/article-parser.git"
},
"author": "@ndaidong",
"main": "./dist/cjs/article-parser.js",
"module": "./src/main.js",
"main": "./src/main.js",
"browser": {
"linkedom": "./src/browser/linkedom.js",
"./main.js": "./dist/article-parser.browser.js"
Expand All @@ -24,18 +23,17 @@
"pretest": "npm run lint",
"test": "cross-env NODE_ENV=test NODE_OPTIONS=--experimental-vm-modules jest --unhandled-rejections=strict",
"build": "node build",
"eval": "cross-env DEBUG=*:* node eval",
"eval:cjs": "cross-env DEBUG=*:* node eval.cjs",
"eval": "cross-env node eval",
"eval:cjs": "cross-env node eval.cjs",
"reset": "node reset"
},
"dependencies": {
"@mozilla/readability": "^0.4.2",
"axios": "^0.27.2",
"bellajs": "^11.0.3",
"debug": "^4.3.4",
"html-crush": "^5.0.19",
"html-crush": "^5.0.20",
"linkedom": "^0.14.12",
"sanitize-html": "^2.7.0",
"sanitize-html": "^2.7.1",
"string-comparison": "^1.1.0",
"tldts": "^5.7.84",
"urlpattern-polyfill": "^5.0.5"
Expand All @@ -48,9 +46,9 @@
"devDependencies": {
"@types/sanitize-html": "^2.6.2",
"cross-env": "^7.0.3",
"esbuild": "^0.14.49",
"esbuild": "^0.14.50",
"jest": "^28.1.3",
"nock": "^13.2.8",
"nock": "^13.2.9",
"standard": "^17.0.0"
},
"keywords": [
Expand Down
4 changes: 0 additions & 4 deletions src/utils/linker.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ import * as stringComparison from 'string-comparison'

import { getParserOptions } from '../config.js'

import logger from './logger.js'

export const isValid = (url = '') => {
try {
const ourl = new URL(url)
Expand All @@ -23,7 +21,6 @@ export const absolutify = (fullUrl = '', relativeUrl = '') => {
const result = new URL(relativeUrl, fullUrl)
return result.toString()
} catch (err) {
logger.error(err)
return ''
}
}
Expand Down Expand Up @@ -98,7 +95,6 @@ export const purify = (url) => {

return pureUrl.toString().replace(pureUrl.hash, '')
} catch (err) {
logger.error(err)
return null
}
}
Expand Down
15 changes: 0 additions & 15 deletions src/utils/logger.js

This file was deleted.

7 changes: 0 additions & 7 deletions src/utils/parseFromHtml.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ import { execPreParser, execPostParser } from './transformation.js'

import getTimeToRead from './getTimeToRead.js'

import logger from './logger.js'

import { getParserOptions } from '../config.js'

const summarize = (desc, txt, threshold, maxlen) => {
Expand Down Expand Up @@ -68,11 +66,9 @@ export default async (inputHtml, inputUrl = '') => {

// gather title
if (!title) {
logger.info('Could not detect article title from meta!')
title = extractTitleWithReadability(html, inputUrl)
}
if (!title) {
logger.info('Could not detect article title!')
return null
}

Expand All @@ -84,7 +80,6 @@ export default async (inputHtml, inputUrl = '') => {
)

if (!links.length) {
logger.info('Could not detect article link!')
return null
}

Expand Down Expand Up @@ -112,13 +107,11 @@ export default async (inputHtml, inputUrl = '') => {
const content = fns(html)

if (!content) {
logger.info('Could not detect article content!')
return null
}

const textContent = stripTags(content)
if (textContent.length < contentLengthThreshold) {
logger.info('Main article is too short!')
return null
}

Expand Down
9 changes: 2 additions & 7 deletions src/utils/retrieve.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import axios from 'axios'

import logger from './logger.js'

import { getRequestOptions } from '../config.js'

export default async (url) => {
Expand All @@ -12,13 +10,10 @@ export default async (url) => {

const contentType = res.headers['content-type'] || ''
if (!contentType || !contentType.includes('text/html')) {
logger.error(`Content type must be "text/html", not "${contentType}"`)
return null
throw new Error(`Content type must be "text/html", not "${contentType}"`)
}

return res.data
} catch (err) {
logger.error(err.message || err)
return null
throw new Error(`${err.name}: ${err.message}`)
}
}
14 changes: 10 additions & 4 deletions src/utils/retrieve.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@ test('test retrieve with unsupported content type', async () => {
scope.get(path).reply(200, '', {
'Content-Type': 'something/strange'
})
const result = await retrieve(url)
expect(result).toBe(null)
try {
await retrieve(url)
} catch (err) {
expect(err).toBeTruthy()
}
})

test('test retrieve from bad source', async () => {
Expand All @@ -42,6 +45,9 @@ test('test retrieve from bad source', async () => {
scope.get(path).reply(500, '<div>this is content</div>', {
'Content-Type': 'text/html'
})
const result = await retrieve(url)
expect(result).toBe(null)
try {
await retrieve(url)
} catch (err) {
expect(err).toBeTruthy()
}
})

0 comments on commit d415e55

Please sign in to comment.