Skip to content

Commit

Permalink
Merge pull request #289 from ndaidong/7.0.2
Browse files Browse the repository at this point in the history
v7.0.2
  • Loading branch information
ndaidong authored Sep 3, 2022
2 parents bc25c16 + 1151830 commit 864ae92
Show file tree
Hide file tree
Showing 10 changed files with 248 additions and 148 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ Extract main article, main image and meta data from URL.
![CodeQL](https://github.com/ndaidong/article-parser/workflows/CodeQL/badge.svg)
[![JavaScript Style Guide](https://img.shields.io/badge/code_style-standard-brightgreen.svg)](https://standardjs.com)

[![Deploy](https://button.deta.dev/1/svg)](https://go.deta.dev/deploy?repo=https://github.com/ndaidong/article-parser-deta)

## Demo

- [Give it a try!](https://demos.pwshub.com/article-parser)
- [Example FaaS](https://extractor.pwshub.com/article/parse?url=https://www.cnbc.com/2022/07/02/tesla-tsla-q2-2022-vehicle-delivery-and-production-numbers.html&apikey=demo-TEyRycuuMCiGBiBocbLGSpagfj7gOF8AMyAWfEgP)
- [Example FaaS](https://extract-article.deta.dev/?url=https://www.freecodecamp.org/news/what-is-an-ide-for-beginners)

## Install

Expand Down
5 changes: 4 additions & 1 deletion build.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* @ndaidong
**/

import { readFileSync, writeFileSync, rmSync, mkdirSync } from 'fs'
import { readFileSync, writeFileSync, copyFileSync, rmSync, mkdirSync } from 'fs'

import { buildSync } from 'esbuild'

Expand Down Expand Up @@ -75,3 +75,6 @@ const browserVersion = {
}
}
buildSync(browserVersion)

// copy types definition to cjs dir
copyFileSync('./index.d.ts', 'dist/cjs/index.d.ts')
4 changes: 4 additions & 0 deletions build.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const pkg = JSON.parse(readFileSync('./package.json'))

const cjsFile = `./dist/cjs/${pkg.name}.js`
const cjsPkg = JSON.parse(readFileSync('./dist/cjs/package.json'))
const cjsTypesFile = './dist/cjs/index.d.ts'

describe('Validate commonjs version output', () => {
test(`Check if ${cjsFile} created`, () => {
Expand All @@ -27,4 +28,7 @@ describe('Validate commonjs version output', () => {
expect(lines[0].includes(pkg.author)).toBeTruthy()
expect(lines[0].includes(pkg.license)).toBeTruthy()
})
test(`Check if ${cjsTypesFile} created`, () => {
expect(existsSync(cjsTypesFile)).toBeTruthy()
})
})
94 changes: 47 additions & 47 deletions dist/article-parser.browser.js

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions dist/article-parser.browser.js.map

Large diffs are not rendered by default.

168 changes: 84 additions & 84 deletions dist/cjs/article-parser.js

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions dist/cjs/article-parser.js.map

Large diffs are not rendered by default.

92 changes: 92 additions & 0 deletions dist/cjs/index.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Type definitions

import {AxiosRequestConfig} from "axios";
import {IOptions as SanitizeOptions} from "sanitize-html";
import {defaults} from "html-crush";
import "urlpattern-polyfill";

type HtmlCrushOptions = Partial<typeof defaults>

/**
* @example
* {
* patterns: [
* '*://example.com/books/:id', {
* hostname: 'example.com',
* pathname: '/books/:id',
* }
* ],
* selector: '.article-body',
* unwanted: ['.removing-box']
* }
*/
export interface Transformation {
patterns: Array<URLPatternInit | string>,
pre?: (document: Document) => Document
post?: (document: Document) => Document
}

/**
* @param input url or html
*/
export function extract(input: string): Promise<ArticleData>;

export function addTransformations(transformations: Array<Transformation>): Number;

export function removeTransformations(options: Array<URLPatternInit>): Number;

export function setParserOptions(options: ParserOptions): void;

export function setRequestOptions(options: AxiosRequestConfig): void;

export function setSanitizeHtmlOptions(options: SanitizeOptions): void;

export function setHtmlCrushOptions(options: HtmlCrushOptions): void;

export function getParserOptions(): ParserOptions;

export function getRequestOptions(): AxiosRequestConfig;

export function getSanitizeHtmlOptions(): SanitizeOptions;

export function getHtmlCrushOptions(): HtmlCrushOptions;

export interface ParserOptions {
/**
* For estimating "time to read".
* Default: 300
*/
wordsPerMinute: number
/**
* To find the best url from list
*/
urlsCompareAlgorithm: 'levenshtein' | 'cosine' | 'diceCoefficient' | 'jaccardIndex' | 'lcs' | 'mlcs'
/**
* Min num of chars required for description
* Default: 40
*/
descriptionLengthThreshold: number
/**
* Max num of chars generated for description
* Default: 156
*/
descriptionTruncateLen: number
/**
* Min num of chars required for content
* Default: 200
*/
contentLengthThreshold: number
}

export interface ArticleData {
url?: string;
links?: string[];
title?: string;
description?: string;
image?: string;
author?: string;
content?: string;
source?: string;
published?: string;
ttr?: number;
}
2 changes: 1 addition & 1 deletion dist/cjs/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "article-parser-cjs",
"version": "7.0.1",
"version": "7.0.2",
"main": "./article-parser.js"
}
12 changes: 6 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "7.0.1",
"version": "7.0.2",
"name": "article-parser",
"description": "To extract main article from given URL",
"homepage": "https://ndaidong.github.io/article-parser-demo/",
Expand Down Expand Up @@ -30,12 +30,12 @@
"dependencies": {
"@mozilla/readability": "^0.4.2",
"axios": "^0.27.2",
"bellajs": "^11.0.4",
"html-crush": "^5.0.22",
"bellajs": "^11.0.5",
"html-crush": "^5.1.3",
"linkedom": "^0.14.12",
"sanitize-html": "^2.7.1",
"string-comparison": "^1.1.0",
"tldts": "^5.7.89",
"tldts": "^5.7.90",
"urlpattern-polyfill": "^5.0.6"
},
"standard": {
Expand All @@ -46,8 +46,8 @@
"devDependencies": {
"@types/sanitize-html": "^2.6.2",
"cross-env": "^7.0.3",
"esbuild": "^0.15.1",
"jest": "^28.1.3",
"esbuild": "^0.15.6",
"jest": "^29.0.1",
"nock": "^13.2.9",
"standard": "^17.0.0"
},
Expand Down

0 comments on commit 864ae92

Please sign in to comment.