Skip to content

Commit

Permalink
Update examples & test with pupperteer
Browse files Browse the repository at this point in the history
  • Loading branch information
ndaidong committed May 7, 2024
1 parent 1d216af commit 32a17a2
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 22 deletions.
9 changes: 0 additions & 9 deletions deno.json

This file was deleted.

4 changes: 2 additions & 2 deletions examples/browser-article-parser/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"start": "node server"
},
"dependencies": {
"express": "^4.18.2",
"got": "^14.2.0"
"express": "latest",
"got": "latest"
}
}
4 changes: 2 additions & 2 deletions examples/bun-article-parser/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
"start": "bun run index.ts"
},
"devDependencies": {
"bun-types": "^1.0.26"
"bun-types": "latest"
},
"dependencies": {
"@extractus/article-extractor": "latest",
"hono": "^4.0.1"
"hono": "latest"
}
}
2 changes: 1 addition & 1 deletion examples/deno-article-parser/deno.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"version": "1.0.0",
"imports": {
"serve": "https://deno.land/std/http/server.ts",
"hono": "https://deno.land/x/hono@v3.11.2/mod.ts",
"hono": "https://deno.land/x/hono/mod.ts",
"article-extractor": "https://esm.sh/@extractus/article-extractor"
},
"tasks": {
Expand Down
2 changes: 1 addition & 1 deletion examples/node-article-parser/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@
},
"dependencies": {
"@extractus/article-extractor": "latest",
"express": "^4.18.2"
"express": "latest"
}
}
19 changes: 19 additions & 0 deletions examples/pupperteer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# node-article-parser with Pupperteer

Install dependencies:

```bash
npm i

# or pnpm, yarn
```

Start server:

```bash
npm start
```

Open `http://localhost:3100/?url=https://www.techradar.com/televisions/samsungs-new-cheaper-oled-tvs-are-now-available-to-buy` to see the result.

---
64 changes: 64 additions & 0 deletions examples/pupperteer/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import puppeteer from 'puppeteer'
import express from 'express'
import { extractFromHtml } from '@extractus/article-extractor'

const app = express()

const meta = {
service: 'article-parser-pupperteer',
lang: 'javascript',
server: 'express',
platform: 'node',
}

const loadHtml = async (url) => {
let browser = null
try {
console.log('Initialize puppeteer engine')
browser = await puppeteer.launch()
const page = await browser.newPage()
await page.setDefaultNavigationTimeout(6e4)
console.log(`Start rendering target page "${url}"`)
await page.goto(url, {
waitUntil: 'networkidle0',
})
console.log(`Load html content from target page ${url}`)
const html = await page.content()
return html
} catch (err) {
console.error(err)
return null
} finally {
if (browser) {
await browser.close()
}
}
}

app.get('/', async (req, res) => {
const url = req.query.url
if (!url) {
return res.json(meta)
}
try {
const html = await loadHtml(url)
const data = await extractFromHtml(html, url)
return res.json({
error: 0,
message: 'article has been extracted successfully',
data,
meta,
})
} catch (err) {
return res.json({
error: 1,
message: err.message,
data: null,
meta,
})
}
})

app.listen(3100, () => {
console.log('Server is running at http://localhost:3100')
})
14 changes: 14 additions & 0 deletions examples/pupperteer/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"name": "node-pupperteer",
"version": "1.0.0",
"main": "index.js",
"type": "module",
"scripts": {
"start": "node index.js"
},
"dependencies": {
"@extractus/article-extractor": "latest",
"express": "latest",
"puppeteer": "latest"
}
}
4 changes: 2 additions & 2 deletions examples/tsnode-article-parser/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"start": "node dist/index.js"
},
"devDependencies": {
"typescript": "^5.3.3"
"typescript": "latest"
},
"dependencies": {
"@extractus/article-extractor": "latest",
"express": "^4.18.2"
"express": "latest"
}
}
11 changes: 6 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "8.0.8",
"version": "8.0.9",
"name": "@extractus/article-extractor",
"description": "To extract main article from given URL",
"homepage": "https://github.com/extractus/article-extractor",
Expand All @@ -11,15 +11,16 @@
"main": "./src/main.js",
"type": "module",
"imports": {
"cross-fetch": "./src/deno/cross-fetch.js"
"cross-fetch": "./src/deno/cross-fetch.js",
"linkedom": "https://deno.land/x/[email protected]/deno-dom-wasm.ts"
},
"browser": {
"cross-fetch": "./src/deno/cross-fetch.js",
"linkedom": "./src/browser/linkedom.js"
},
"types": "./index.d.ts",
"engines": {
"node": ">= 16"
"node": ">= 18"
},
"scripts": {
"lint": "eslint .",
Expand All @@ -38,8 +39,8 @@
},
"devDependencies": {
"@types/sanitize-html": "^2.11.0",
"eslint": "^9.1.1",
"globals": "^15.0.0",
"eslint": "^9.2.0",
"globals": "^15.1.0",
"https-proxy-agent": "^7.0.4",
"jest": "^29.7.0",
"nock": "^13.5.4"
Expand Down

0 comments on commit 32a17a2

Please sign in to comment.