Skip to content

Commit

Permalink
Merge pull request #79 from extractus/6.2.0
Browse files Browse the repository at this point in the history
v6.2.0
  • Loading branch information
ndaidong authored Jan 4, 2023
2 parents 220217f + b2e9336 commit e5be9ab
Show file tree
Hide file tree
Showing 8 changed files with 939 additions and 11 deletions.
8 changes: 5 additions & 3 deletions dist/cjs/feed-extractor.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion dist/cjs/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "@extractus/feed-extractor",
"version": "6.1.9",
"version": "6.2.0",
"main": "./feed-extractor.js"
}
8 changes: 5 additions & 3 deletions dist/feed-extractor.esm.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// @extractus/feed-extractor@6.1.9, by @extractus - built with esbuild at 2023-01-04T04:09:49.802Z - published under MIT license
// @extractus/feed-extractor@6.2.0, by @extractus - built with esbuild at 2023-01-04T04:59:05.957Z - published under MIT license
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
Expand Down Expand Up @@ -2003,7 +2003,7 @@ var getLink = (val = [], id = "") => {
if (id && isValid(id)) {
return id;
}
if (isObject(id) && hasProperty(id, "@_isPermaLink") && Boolean(id["@_isPermaLink"]) === true) {
if (isObject(id) && hasProperty(id, "@_isPermaLink") && id["@_isPermaLink"] === "true") {
return getText(id);
}
const getEntryLink = (links) => {
Expand Down Expand Up @@ -2233,13 +2233,15 @@ var transform3 = (item, options) => {
const {
id = "",
title = "",
issued = "",
modified = "",
updated = "",
published = "",
link = "",
summary = "",
content = ""
} = item;
const pubDate = updated || published;
const pubDate = updated || modified || published || issued;
const htmlContent = getText(content || summary);
const entry = {
id: getEntryId(id, link, pubDate),
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "6.1.9",
"version": "6.2.0",
"name": "@extractus/feed-extractor",
"description": "To read and normalize RSS/ATOM/JSON feed data",
"homepage": "https://github.com/extractus/feed-extractor",
Expand Down
39 changes: 38 additions & 1 deletion src/main.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ import { readFileSync } from 'fs'

import nock from 'nock'

import { hasProperty } from 'bellajs'
import { hasProperty, isString } from 'bellajs'

import { read } from './main.js'
import { isValid as isValidUrl } from './utils/linker.js'

const feedAttrs = 'title link description generator language published entries'.split(' ')
const entryAttrs = 'title link description published id'.split(' ')
Expand All @@ -20,6 +21,19 @@ const parseUrl = (url) => {
}
}

const isValidDate = (d) => {
return (new Date(d)).toString() !== 'Invalid Date'
}

const validateProps = (entry) => {
const { id, link, title, published, description } = entry
return isString(description) &&
isString(id) && id !== '' &&
isString(title) && title !== '' &&
isString(link) && isValidUrl(link) &&
isString(published) && isValidDate(published)
}

describe('test read() function with common issues', () => {
test('read feed from a non-string link', () => {
expect(read([])).rejects.toThrow(new Error('Input param must be a valid URL'))
Expand Down Expand Up @@ -76,6 +90,7 @@ describe('test read() standard feed', (done) => {
entryAttrs.forEach((k) => {
expect(hasProperty(result.entries[0], k)).toBe(true)
})
expect(validateProps(result.entries[0])).toBe(true)
})

test('read atom feed from Google', async () => {
Expand All @@ -92,6 +107,7 @@ describe('test read() standard feed', (done) => {
entryAttrs.forEach((k) => {
expect(hasProperty(result.entries[0], k)).toBe(true)
})
expect(validateProps(result.entries[0])).toBe(true)
})

test('read atom feed from Google with extraFields', async () => {
Expand All @@ -115,6 +131,7 @@ describe('test read() standard feed', (done) => {
})
expect(hasProperty(result, 'author')).toBe(true)
expect(hasProperty(result.entries[0], 'id')).toBe(true)
expect(validateProps(result.entries[0])).toBe(true)
})

test('read atom feed which contains multi links', async () => {
Expand All @@ -131,6 +148,7 @@ describe('test read() standard feed', (done) => {
entryAttrs.forEach((k) => {
expect(hasProperty(result.entries[0], k)).toBe(true)
})
expect(validateProps(result.entries[0])).toBe(true)
})

test('read json feed from Micro.blog', async () => {
Expand All @@ -147,6 +165,7 @@ describe('test read() standard feed', (done) => {
entryAttrs.forEach((k) => {
expect(hasProperty(result.entries[0], k)).toBe(true)
})
expect(validateProps(result.entries[0])).toBe(true)
})

test('read json feed from Micro.blog with extra fields', async () => {
Expand All @@ -170,6 +189,24 @@ describe('test read() standard feed', (done) => {
})
expect(hasProperty(result, 'icon')).toBe(true)
expect(hasProperty(result.entries[0], 'id')).toBe(true)
expect(validateProps(result.entries[0])).toBe(true)
})

test('read rss feed from huggingface.co (no link)', async () => {
const url = 'https://huggingface.co/no-link/rss'
const xml = readFileSync('test-data/rss-feed-miss-link.xml', 'utf8')
const { baseUrl, path } = parseUrl(url)
nock(baseUrl).get(path).reply(200, xml, {
'Content-Type': 'application/xml'
})
const result = await read(url)
feedAttrs.forEach((k) => {
expect(hasProperty(result, k)).toBe(true)
})
entryAttrs.forEach((k) => {
expect(hasProperty(result.entries[0], k)).toBe(true)
})
expect(validateProps(result.entries[0])).toBe(true)
})
})

Expand Down
2 changes: 1 addition & 1 deletion src/utils/normalizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export const getLink = (val = [], id = '') => {
if (id && isValidUrl(id)) {
return id
}
if (isObject(id) && hasProperty(id, '@_isPermaLink') && Boolean(id['@_isPermaLink']) === true) {
if (isObject(id) && hasProperty(id, '@_isPermaLink') && id['@_isPermaLink'] === 'true') {
return getText(id)
}
const getEntryLink = (links) => {
Expand Down
4 changes: 3 additions & 1 deletion src/utils/parseAtomFeed.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@ const transform = (item, options) => {
const {
id = '',
title = '',
issued = '',
modified = '',
updated = '',
published = '',
link = '',
summary = '',
content = ''
} = item

const pubDate = updated || published
const pubDate = updated || modified || published || issued
const htmlContent = getText(content || summary)
const entry = {
id: getEntryId(id, link, pubDate),
Expand Down
Loading

0 comments on commit e5be9ab

Please sign in to comment.