Skip to content

Commit

Permalink
Merge pull request #401 from extractus/8.0.11
Browse files Browse the repository at this point in the history
v8.0.11
  • Loading branch information
ndaidong authored Oct 14, 2024
2 parents 4f0e78d + bf44188 commit 34c58f1
Show file tree
Hide file tree
Showing 15 changed files with 357 additions and 387 deletions.
18 changes: 0 additions & 18 deletions .github/workflows/ci-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,10 @@ jobs:
npm run build --if-present
npm run test
- name: Coveralls Parallel
uses: coverallsapp/github-action@v2
with:
flag-name: run-${{ join(matrix.*, '-') }}
parallel: true
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: cache node modules
uses: actions/cache@v4
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-
finish:
needs: test
if: ${{ always() }}
runs-on: ubuntu-latest
steps:
- name: Coveralls Finished
uses: coverallsapp/github-action@v2
with:
parallel-finished: true
carryforward: "run-18.x,run-20.x,run-21.x"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ coverage
yarn.lock
coverage.lcov
pnpm-lock.yaml
lcov.info

deno.lock

Expand Down
1 change: 1 addition & 0 deletions .npmignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ coverage
pnpm-lock.yaml
examples
test-data
lcov.info
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ Extract main article, main image and meta data from URL.
[![npm version](https://badge.fury.io/js/@extractus%2Farticle-extractor.svg)](https://badge.fury.io/js/@extractus%2Farticle-extractor)
![CodeQL](https://github.com/extractus/article-extractor/workflows/CodeQL/badge.svg)
![CI test](https://github.com/extractus/article-extractor/workflows/ci-test/badge.svg)
[![Coverage Status](https://coveralls.io/repos/github/extractus/article-extractor/badge.svg?branch=main)](https://coveralls.io/github/extractus/article-extractor?branch=main)

(This library is derived from [article-parser](https://www.npmjs.com/package/article-parser) renamed.)

Expand Down
3 changes: 1 addition & 2 deletions eslint.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ export default [
globals: {
...globals.node,
...globals.browser,
...globals.jest,
Intl: 'readonly',
},
},
Expand Down Expand Up @@ -76,7 +75,7 @@ export default [
'max-lines-per-function': [
'error',
{
'max': 150,
'max': 180,
'skipBlankLines': true,
},
],
Expand Down
21 changes: 10 additions & 11 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "8.0.10",
"version": "8.0.11",
"name": "@extractus/article-extractor",
"description": "To extract main article from given URL",
"homepage": "https://github.com/extractus/article-extractor",
Expand All @@ -25,24 +25,23 @@
"lint": "eslint .",
"lint:fix": "eslint --fix .",
"pretest": "npm run lint",
"test": "NODE_ENV=test NODE_OPTIONS=--experimental-vm-modules jest --verbose --coverage=true",
"test": "node --test",
"eval": "node eval",
"reset": "node reset"
},
"dependencies": {
"@mozilla/readability": "^0.5.0",
"bellajs": "^11.1.3",
"bellajs": "^11.2.0",
"cross-fetch": "^4.0.0",
"linkedom": "^0.16.11",
"sanitize-html": "2.13.0"
"linkedom": "^0.18.5",
"sanitize-html": "2.13.1"
},
"devDependencies": {
"@types/sanitize-html": "^2.11.0",
"eslint": "^9.2.0",
"globals": "^15.1.0",
"https-proxy-agent": "^7.0.4",
"jest": "^29.7.0",
"nock": "^13.5.4"
"@types/sanitize-html": "^2.13.0",
"eslint": "^9.12.0",
"globals": "^15.11.0",
"https-proxy-agent": "^7.0.5",
"nock": "^13.5.5"
},
"keywords": [
"article",
Expand Down
43 changes: 23 additions & 20 deletions src/config.test.js
Original file line number Diff line number Diff line change
@@ -1,34 +1,37 @@
// config.test
/* eslint-env jest */
import { describe, it } from 'node:test'
import assert from 'node:assert'

import {
setSanitizeHtmlOptions,
getSanitizeHtmlOptions
} from './config.js'

test('Testing setSanitizeHtmlOptions/getSanitizeHtmlOptions methods', () => {
setSanitizeHtmlOptions({
allowedTags: ['div', 'span'],
allowedAttributes: {
describe('check config methods', () => {
it('Testing setSanitizeHtmlOptions/getSanitizeHtmlOptions methods', () => {
setSanitizeHtmlOptions({
allowedTags: ['div', 'span'],
allowedAttributes: {
a: ['href', 'title'],
},
})

const actual = getSanitizeHtmlOptions()
const actualAllowedAttributes = actual.allowedAttributes
const expectedAllowedAttributes = {
a: ['href', 'title'],
},
})
}

const actual = getSanitizeHtmlOptions()
const actualAllowedAttributes = actual.allowedAttributes
const expectedAllowedAttributes = {
a: ['href', 'title'],
}
assert.deepEqual(actualAllowedAttributes, expectedAllowedAttributes)

expect(actualAllowedAttributes).toEqual(expectedAllowedAttributes)
const actualAllowedTags = actual.allowedTags
const expectedAllowedTags = ['div', 'span']
assert.deepEqual(actualAllowedTags, expectedAllowedTags)

const actualAllowedTags = actual.allowedTags
const expectedAllowedTags = ['div', 'span']
expect(actualAllowedTags).toEqual(expectedAllowedTags)
setSanitizeHtmlOptions({
allowedTags: [],
})

setSanitizeHtmlOptions({
allowedTags: [],
assert.deepEqual(getSanitizeHtmlOptions().allowedTags, [])
})

expect(getSanitizeHtmlOptions().allowedTags).toEqual([])
})
52 changes: 27 additions & 25 deletions src/main.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// main.test
/* eslint-env jest */

import { describe, it } from 'node:test'
import assert from 'node:assert'

import { readFileSync } from 'fs'

Expand All @@ -13,7 +15,7 @@ import {
setSanitizeHtmlOptions,
addTransformations,
removeTransformations
} from './main'
} from './main.js'

const env = process.env || {}
const PROXY_SERVER = env.PROXY_SERVER || ''
Expand All @@ -36,8 +38,8 @@ describe('check all exported methods', () => {
]

fns.forEach((fn) => {
test(` check ${fn.name}`, () => {
expect(fn).toBeTruthy()
it(` check ${fn.name}`, () => {
assert.ok(fn)
})
})
})
Expand All @@ -56,11 +58,11 @@ describe('test extract(bad url)', () => {
]

badSamples.forEach((url) => {
test(`testing extract bad url "${url}"`, async () => {
it(`testing extract bad url "${url}"`, async () => {
try {
await extract(url)
} catch (err) {
expect(err).toBeTruthy()
assert.ok(err)
}
})
})
Expand All @@ -78,28 +80,28 @@ describe('test extract(regular article url)', () => {
url: 'https://somewhere.com/path/to/no/article',
html: readFileSync('./test-data/html-no-article.html', 'utf8'),
},
validate: (result, expect) => {
expect(result).toBeFalsy()
validate: (result) => {
assert.equal(result, null)
},
},
{
input: {
url: 'https://somewhere.com/path/to/no/content',
html: '',
},
validate: (result, expect) => {
expect(result).toBeFalsy()
validate: (result) => {
assert.equal(result, null)
},
},
{
input: {
url: 'https://somewhere.com/path/to/article',
html: readFileSync('./test-data/regular-article.html', 'utf8'),
},
validate: (result, expect) => {
expect(result).toBeTruthy()
expect(result.title).toEqual('Article title here')
expect(result.description).toEqual(expDesc)
validate: (result) => {
assert.ok(result)
assert.equal(result.title, 'Article title here')
assert.equal(result.description, expDesc)
},
},
]
Expand All @@ -111,18 +113,18 @@ describe('test extract(regular article url)', () => {
.reply(statusCode, html, {
'Content-Type': 'text/html',
})
test(`check extract("${url}")`, async () => {
it(`check extract("${url}")`, async () => {
const result = await extract(url)
validate(result, expect)
validate(result)
})
})

test('check extract(html string)', async () => {
it('check extract(html string)', async () => {
const html = readFileSync('./test-data/regular-article.html', 'utf8')
const result = await extract(html)
expect(result).toBeTruthy()
expect(result.title).toEqual('Article title here')
expect(result.description).toEqual(expDesc)
assert.ok(result)
assert.equal(result.title, 'Article title here')
assert.equal(result.description, expDesc)
})
})

Expand All @@ -141,22 +143,22 @@ describe('test extract with modified sanitize-html options', () => {
},
})

test('check if output contain class attribute', async () => {
it('check if output contain class attribute', async () => {
const html = readFileSync('./test-data/article-with-classes-attributes.html', 'utf8')
const result = await extract(html)
expect(result.content).toEqual(expect.stringContaining('code class="lang-js"'))
assert.ok(result.content.includes('code class="lang-js"'))
})
})

if (PROXY_SERVER !== '') {
describe('test extract live article API via proxy server', () => {
test('check if extract method works with proxy server', async () => {
it('check if extract method works with proxy server', async () => {
const url = 'https://www.cnbc.com/2022/09/21/what-another-major-rate-hike-by-the-federal-reserve-means-to-you.html'
const result = await extract(url, {}, {
agent: new HttpsProxyAgent(PROXY_SERVER),
})
expect(result.title).toEqual(expect.stringContaining('Federal Reserve'))
expect(result.source).toEqual('cnbc.com')
assert.ok(result.title.includes('Federal Reserve'))
assert.equal(result.source, 'cnbc.com')
}, 10000)
})
}
31 changes: 17 additions & 14 deletions src/utils/extractMetaData.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// extractMetaData.test
/* eslint-env jest */
import { describe, it } from 'node:test'
import assert from 'node:assert'

import { readFileSync } from 'node:fs'

Expand All @@ -9,20 +10,22 @@ import extractMetaData from './extractMetaData.js'

const keys = 'url shortlink amphtml canonical title description image author source published favicon type'.split(' ')

test('test extractMetaData(good content)', async () => {
const html = readFileSync('./test-data/regular-article.html', 'utf8')
const result = extractMetaData(html)
expect(isObject(result)).toBe(true)
keys.forEach((k) => {
expect(hasProperty(result, k)).toBe(true)
describe('test extractMetaData', () => {
it('test extractMetaData(good content)', async () => {
const html = readFileSync('./test-data/regular-article.html', 'utf8')
const result = extractMetaData(html)
assert.ok(isObject(result))
keys.forEach((k) => {
assert.ok(hasProperty(result, k))
})
})
})

test('test extractMetaData(json ld schema content)', async () => {
const html = readFileSync('./test-data/regular-article-json-ld.html', 'utf8')
const result = extractMetaData(html)
expect(isObject(result)).toBe(true)
keys.forEach((k) => {
expect(hasProperty(result, k)).toBe(true)
it('test extractMetaData(json ld schema content)', async () => {
const html = readFileSync('./test-data/regular-article-json-ld.html', 'utf8')
const result = extractMetaData(html)
assert.ok(isObject(result))
keys.forEach((k) => {
assert.ok(hasProperty(result, k))
})
})
})
Loading

0 comments on commit 34c58f1

Please sign in to comment.