Merge pull request #401 from extractus/8.0.11

v8.0.11
extractus · Oct 14, 2024 · 34c58f1 · 34c58f1
2 parents 4f0e78d + bf44188
commit 34c58f1
Show file tree

Hide file tree

Showing 15 changed files with 357 additions and 387 deletions.
diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml
@@ -31,28 +31,10 @@ jobs:
         npm run build --if-present
         npm run test
 
-    - name: Coveralls Parallel
-      uses: coverallsapp/github-action@v2
-      with:
-        flag-name: run-${{ join(matrix.*, '-') }}
-        parallel: true
-        github-token: ${{ secrets.GITHUB_TOKEN }}
-
     - name: cache node modules
       uses: actions/cache@v4
       with:
         path: ~/.npm
         key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
         restore-keys: |
           ${{ runner.os }}-node-
-
-  finish:
-    needs: test
-    if: ${{ always() }}
-    runs-on: ubuntu-latest
-    steps:
-    - name: Coveralls Finished
-      uses: coverallsapp/github-action@v2
-      with:
-        parallel-finished: true
-        carryforward: "run-18.x,run-20.x,run-21.x"
diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,7 @@ coverage
 yarn.lock
 coverage.lcov
 pnpm-lock.yaml
+lcov.info
 
 deno.lock
 

diff --git a/.npmignore b/.npmignore
@@ -4,3 +4,4 @@ coverage
 pnpm-lock.yaml
 examples
 test-data
+lcov.info
diff --git a/README.md b/README.md
@@ -5,7 +5,6 @@ Extract main article, main image and meta data from URL.
 [![npm version](https://badge.fury.io/js/@extractus%2Farticle-extractor.svg)](https://badge.fury.io/js/@extractus%2Farticle-extractor)
 ![CodeQL](https://github.com/extractus/article-extractor/workflows/CodeQL/badge.svg)
 ![CI test](https://github.com/extractus/article-extractor/workflows/ci-test/badge.svg)
-[![Coverage Status](https://coveralls.io/repos/github/extractus/article-extractor/badge.svg?branch=main)](https://coveralls.io/github/extractus/article-extractor?branch=main)
 
 (This library is derived from [article-parser](https://www.npmjs.com/package/article-parser) renamed.)
 

diff --git a/eslint.config.js b/eslint.config.js
@@ -12,7 +12,6 @@ export default [
       globals: {
         ...globals.node,
         ...globals.browser,
-        ...globals.jest,
         Intl: 'readonly',
       },
     },
@@ -76,7 +75,7 @@ export default [
       'max-lines-per-function': [
         'error',
         {
-          'max': 150,
+          'max': 180,
           'skipBlankLines': true,
         },
       ],

diff --git a/package.json b/package.json
@@ -1,5 +1,5 @@
 {
-  "version": "8.0.10",
+  "version": "8.0.11",
   "name": "@extractus/article-extractor",
   "description": "To extract main article from given URL",
   "homepage": "https://github.com/extractus/article-extractor",
@@ -25,24 +25,23 @@
     "lint": "eslint .",
     "lint:fix": "eslint --fix .",
     "pretest": "npm run lint",
-    "test": "NODE_ENV=test NODE_OPTIONS=--experimental-vm-modules jest --verbose --coverage=true",
+    "test": "node --test",
     "eval": "node eval",
     "reset": "node reset"
   },
   "dependencies": {
     "@mozilla/readability": "^0.5.0",
-    "bellajs": "^11.1.3",
+    "bellajs": "^11.2.0",
     "cross-fetch": "^4.0.0",
-    "linkedom": "^0.16.11",
-    "sanitize-html": "2.13.0"
+    "linkedom": "^0.18.5",
+    "sanitize-html": "2.13.1"
   },
   "devDependencies": {
-    "@types/sanitize-html": "^2.11.0",
-    "eslint": "^9.2.0",
-    "globals": "^15.1.0",
-    "https-proxy-agent": "^7.0.4",
-    "jest": "^29.7.0",
-    "nock": "^13.5.4"
+    "@types/sanitize-html": "^2.13.0",
+    "eslint": "^9.12.0",
+    "globals": "^15.11.0",
+    "https-proxy-agent": "^7.0.5",
+    "nock": "^13.5.5"
   },
   "keywords": [
     "article",

diff --git a/src/config.test.js b/src/config.test.js
@@ -1,34 +1,37 @@
 // config.test
-/* eslint-env jest */
+import { describe, it } from 'node:test'
+import assert from 'node:assert'
 
 import {
   setSanitizeHtmlOptions,
   getSanitizeHtmlOptions
 } from './config.js'
 
-test('Testing setSanitizeHtmlOptions/getSanitizeHtmlOptions methods', () => {
-  setSanitizeHtmlOptions({
-    allowedTags: ['div', 'span'],
-    allowedAttributes: {
+describe('check config methods', () => {
+  it('Testing setSanitizeHtmlOptions/getSanitizeHtmlOptions methods', () => {
+    setSanitizeHtmlOptions({
+      allowedTags: ['div', 'span'],
+      allowedAttributes: {
+        a: ['href', 'title'],
+      },
+    })
+
+    const actual = getSanitizeHtmlOptions()
+    const actualAllowedAttributes = actual.allowedAttributes
+    const expectedAllowedAttributes = {
       a: ['href', 'title'],
-    },
-  })
+    }
 
-  const actual = getSanitizeHtmlOptions()
-  const actualAllowedAttributes = actual.allowedAttributes
-  const expectedAllowedAttributes = {
-    a: ['href', 'title'],
-  }
+    assert.deepEqual(actualAllowedAttributes, expectedAllowedAttributes)
 
-  expect(actualAllowedAttributes).toEqual(expectedAllowedAttributes)
+    const actualAllowedTags = actual.allowedTags
+    const expectedAllowedTags = ['div', 'span']
+    assert.deepEqual(actualAllowedTags, expectedAllowedTags)
 
-  const actualAllowedTags = actual.allowedTags
-  const expectedAllowedTags = ['div', 'span']
-  expect(actualAllowedTags).toEqual(expectedAllowedTags)
+    setSanitizeHtmlOptions({
+      allowedTags: [],
+    })
 
-  setSanitizeHtmlOptions({
-    allowedTags: [],
+    assert.deepEqual(getSanitizeHtmlOptions().allowedTags, [])
   })
-
-  expect(getSanitizeHtmlOptions().allowedTags).toEqual([])
 })
diff --git a/src/main.test.js b/src/main.test.js
@@ -1,5 +1,7 @@
 // main.test
-/* eslint-env jest */
+
+import { describe, it } from 'node:test'
+import assert from 'node:assert'
 
 import { readFileSync } from 'fs'
 
@@ -13,7 +15,7 @@ import {
   setSanitizeHtmlOptions,
   addTransformations,
   removeTransformations
-} from './main'
+} from './main.js'
 
 const env = process.env || {}
 const PROXY_SERVER = env.PROXY_SERVER || ''
@@ -36,8 +38,8 @@ describe('check all exported methods', () => {
   ]
 
   fns.forEach((fn) => {
-    test(` check ${fn.name}`, () => {
-      expect(fn).toBeTruthy()
+    it(` check ${fn.name}`, () => {
+      assert.ok(fn)
     })
   })
 })
@@ -56,11 +58,11 @@ describe('test extract(bad url)', () => {
   ]
 
   badSamples.forEach((url) => {
-    test(`testing extract bad url "${url}"`, async () => {
+    it(`testing extract bad url "${url}"`, async () => {
       try {
         await extract(url)
       } catch (err) {
-        expect(err).toBeTruthy()
+        assert.ok(err)
       }
     })
   })
@@ -78,28 +80,28 @@ describe('test extract(regular article url)', () => {
         url: 'https://somewhere.com/path/to/no/article',
         html: readFileSync('./test-data/html-no-article.html', 'utf8'),
       },
-      validate: (result, expect) => {
-        expect(result).toBeFalsy()
+      validate: (result) => {
+        assert.equal(result, null)
       },
     },
     {
       input: {
         url: 'https://somewhere.com/path/to/no/content',
         html: '',
       },
-      validate: (result, expect) => {
-        expect(result).toBeFalsy()
+      validate: (result) => {
+        assert.equal(result, null)
       },
     },
     {
       input: {
         url: 'https://somewhere.com/path/to/article',
         html: readFileSync('./test-data/regular-article.html', 'utf8'),
       },
-      validate: (result, expect) => {
-        expect(result).toBeTruthy()
-        expect(result.title).toEqual('Article title here')
-        expect(result.description).toEqual(expDesc)
+      validate: (result) => {
+        assert.ok(result)
+        assert.equal(result.title, 'Article title here')
+        assert.equal(result.description, expDesc)
       },
     },
   ]
@@ -111,18 +113,18 @@ describe('test extract(regular article url)', () => {
       .reply(statusCode, html, {
         'Content-Type': 'text/html',
       })
-    test(`check extract("${url}")`, async () => {
+    it(`check extract("${url}")`, async () => {
       const result = await extract(url)
-      validate(result, expect)
+      validate(result)
     })
   })
 
-  test('check extract(html string)', async () => {
+  it('check extract(html string)', async () => {
     const html = readFileSync('./test-data/regular-article.html', 'utf8')
     const result = await extract(html)
-    expect(result).toBeTruthy()
-    expect(result.title).toEqual('Article title here')
-    expect(result.description).toEqual(expDesc)
+    assert.ok(result)
+    assert.equal(result.title, 'Article title here')
+    assert.equal(result.description, expDesc)
   })
 })
 
@@ -141,22 +143,22 @@ describe('test extract with modified sanitize-html options', () => {
     },
   })
 
-  test('check if output contain class attribute', async () => {
+  it('check if output contain class attribute', async () => {
     const html = readFileSync('./test-data/article-with-classes-attributes.html', 'utf8')
     const result = await extract(html)
-    expect(result.content).toEqual(expect.stringContaining('code class="lang-js"'))
+    assert.ok(result.content.includes('code class="lang-js"'))
   })
 })
 
 if (PROXY_SERVER !== '') {
   describe('test extract live article API via proxy server', () => {
-    test('check if extract method works with proxy server', async () => {
+    it('check if extract method works with proxy server', async () => {
       const url = 'https://www.cnbc.com/2022/09/21/what-another-major-rate-hike-by-the-federal-reserve-means-to-you.html'
       const result = await extract(url, {}, {
         agent: new HttpsProxyAgent(PROXY_SERVER),
       })
-      expect(result.title).toEqual(expect.stringContaining('Federal Reserve'))
-      expect(result.source).toEqual('cnbc.com')
+      assert.ok(result.title.includes('Federal Reserve'))
+      assert.equal(result.source, 'cnbc.com')
     }, 10000)
   })
 }
diff --git a/src/utils/extractMetaData.test.js b/src/utils/extractMetaData.test.js
@@ -1,5 +1,6 @@
 // extractMetaData.test
-/* eslint-env jest */
+import { describe, it } from 'node:test'
+import assert from 'node:assert'
 
 import { readFileSync } from 'node:fs'
 
@@ -9,20 +10,22 @@ import extractMetaData from './extractMetaData.js'
 
 const keys = 'url shortlink amphtml canonical title description image author source published favicon type'.split(' ')
 
-test('test extractMetaData(good content)', async () => {
-  const html = readFileSync('./test-data/regular-article.html', 'utf8')
-  const result = extractMetaData(html)
-  expect(isObject(result)).toBe(true)
-  keys.forEach((k) => {
-    expect(hasProperty(result, k)).toBe(true)
+describe('test extractMetaData', () => {
+  it('test extractMetaData(good content)', async () => {
+    const html = readFileSync('./test-data/regular-article.html', 'utf8')
+    const result = extractMetaData(html)
+    assert.ok(isObject(result))
+    keys.forEach((k) => {
+      assert.ok(hasProperty(result, k))
+    })
   })
-})
 
-test('test extractMetaData(json ld schema content)', async () => {
-  const html = readFileSync('./test-data/regular-article-json-ld.html', 'utf8')
-  const result = extractMetaData(html)
-  expect(isObject(result)).toBe(true)
-  keys.forEach((k) => {
-    expect(hasProperty(result, k)).toBe(true)
+  it('test extractMetaData(json ld schema content)', async () => {
+    const html = readFileSync('./test-data/regular-article-json-ld.html', 'utf8')
+    const result = extractMetaData(html)
+    assert.ok(isObject(result))
+    keys.forEach((k) => {
+      assert.ok(hasProperty(result, k))
+    })
   })
 })