Skip to content

Commit

Permalink
Merge pull request #77 from extractus/v6.1.8
Browse files Browse the repository at this point in the history
v6.1.8
  • Loading branch information
ndaidong authored Dec 30, 2022
2 parents 8d65cd1 + d9762ad commit e9c5d67
Show file tree
Hide file tree
Showing 13 changed files with 52 additions and 13 deletions.
13 changes: 11 additions & 2 deletions dist/cjs/feed-extractor.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions dist/cjs/index.d.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
// Type definitions

export interface FeedEntry {
/**
* id, guid, or generated identifier for the entry
*/
id: string;
link?: string;
title?: string;
description?: string;
Expand Down
2 changes: 1 addition & 1 deletion dist/cjs/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "@extractus/feed-extractor",
"version": "6.1.7",
"version": "6.1.8",
"main": "./feed-extractor.js"
}
13 changes: 11 additions & 2 deletions dist/feed-extractor.esm.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// @extractus/[email protected].7, by @extractus - built with esbuild at 2022-12-06T05:40:03.694Z - published under MIT license
// @extractus/[email protected].8, by @extractus - built with esbuild at 2022-12-30T05:55:09.972Z - published under MIT license
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
Expand Down Expand Up @@ -1885,7 +1885,7 @@ var profetch = async (url, proxy = {}) => {
var retrieve_default = async (url, options = {}) => {
const {
headers = {
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:104.0) Gecko/20100101 Firefox/104.0"
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0"
},
proxy = null
} = options;
Expand Down Expand Up @@ -2015,6 +2015,10 @@ var getPureUrl = (url, id = "") => {
const link = getLink(url, id);
return link ? purify(link) : "";
};
var hash = (str) => Math.abs(str.split("").reduce((s, c) => Math.imul(31, s) + c.charCodeAt(0) | 0, 0)).toString(36);
var getEntryId = (id, url, pubDate) => {
return id ? getText(id) : hash(getPureUrl(url)) + "-" + new Date(pubDate).getTime();
};
var getEnclosure = (val) => {
const url = hasProperty(val, "@_url") ? val["@_url"] : "";
const type = hasProperty(val, "@_type") ? val["@_type"] : "";
Expand Down Expand Up @@ -2055,6 +2059,7 @@ var transform = (item, options) => {
getExtraEntryFields
} = options;
const {
id = "",
title = "",
url: link = "",
date_published: pubDate = "",
Expand All @@ -2065,6 +2070,7 @@ var transform = (item, options) => {
const published = useISODateFormat ? toISODateString(pubDate) : pubDate;
const extraFields = getExtraEntryFields(item);
const entry = {
id: getEntryId(id, link, pubDate),
title,
link: purify(link),
published,
Expand Down Expand Up @@ -2117,13 +2123,15 @@ var transform2 = (item, options) => {
getExtraEntryFields
} = options;
const {
guid = "",
title = "",
link = "",
pubDate = "",
description = ""
} = item;
const published = useISODateFormat ? toISODateString(pubDate) : pubDate;
const entry = {
id: getEntryId(guid, link, pubDate),
title: getText(title),
link: getPureUrl(link),
published,
Expand Down Expand Up @@ -2231,6 +2239,7 @@ var transform3 = (item, options) => {
const pubDate = updated || published;
const htmlContent = getText(content || summary);
const entry = {
id: getEntryId(id, link, pubDate),
title: getText(title),
link: getPureUrl(link, id),
published: useISODateFormat ? toISODateString(pubDate) : pubDate,
Expand Down
4 changes: 4 additions & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
// Type definitions

export interface FeedEntry {
/**
* id, guid, or generated identifier for the entry
*/
id: string;
link?: string;
title?: string;
description?: string;
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "6.1.7",
"version": "6.1.8",
"name": "@extractus/feed-extractor",
"description": "To read and normalize RSS/ATOM/JSON feed data",
"homepage": "https://github.com/extractus/feed-extractor",
Expand Down Expand Up @@ -40,7 +40,7 @@
"html-entities": "^2.3.3"
},
"devDependencies": {
"esbuild": "^0.15.18",
"esbuild": "^0.16.12",
"jest": "^29.3.1",
"nock": "^13.2.9",
"standard": "^17.0.0"
Expand Down
2 changes: 1 addition & 1 deletion src/main.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import { hasProperty } from 'bellajs'
import { read } from './main.js'

const feedAttrs = 'title link description generator language published entries'.split(' ')
const entryAttrs = 'title link description published'.split(' ')
const entryAttrs = 'title link description published id'.split(' ')

const parseUrl = (url) => {
const re = new URL(url)
Expand Down
6 changes: 6 additions & 0 deletions src/utils/normalizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ export const getPureUrl = (url, id = '') => {
return link ? purifyUrl(link) : ''
}

const hash = (str) => Math.abs(str.split('').reduce((s, c) => Math.imul(31, s) + c.charCodeAt(0) | 0, 0)).toString(36)

export const getEntryId = (id, url, pubDate) => {
return id ? getText(id) : hash(getPureUrl(url)) + '-' + (new Date(pubDate)).getTime()
}

export const getEnclosure = (val) => {
const url = hasProperty(val, '@_url') ? val['@_url'] : ''
const type = hasProperty(val, '@_type') ? val['@_type'] : ''
Expand Down
4 changes: 3 additions & 1 deletion src/utils/parseAtomFeed.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import {
getText,
toISODateString,
buildDescription,
getPureUrl
getPureUrl,
getEntryId
} from './normalizer.js'

const transform = (item, options) => {
Expand All @@ -32,6 +33,7 @@ const transform = (item, options) => {
const pubDate = updated || published
const htmlContent = getText(content || summary)
const entry = {
id: getEntryId(id, link, pubDate),
title: getText(title),
link: getPureUrl(link, id),
published: useISODateFormat ? toISODateString(pubDate) : pubDate,
Expand Down
5 changes: 4 additions & 1 deletion src/utils/parseJsonFeed.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import { isArray } from 'bellajs'

import {
toISODateString,
buildDescription
buildDescription,
getEntryId
} from './normalizer.js'

import { purify as purifyUrl } from './linker.js'
Expand All @@ -19,6 +20,7 @@ const transform = (item, options) => {
} = options

const {
id = '',
title = '',
url: link = '',
date_published: pubDate = '',
Expand All @@ -31,6 +33,7 @@ const transform = (item, options) => {
const extraFields = getExtraEntryFields(item)

const entry = {
id: getEntryId(id, link, pubDate),
title,
link: purifyUrl(link),
published,
Expand Down
5 changes: 4 additions & 1 deletion src/utils/parseRssFeed.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import {
toISODateString,
buildDescription,
getPureUrl,
getOptionalTags
getOptionalTags,
getEntryId
} from './normalizer.js'

const transform = (item, options) => {
Expand All @@ -20,6 +21,7 @@ const transform = (item, options) => {
} = options

const {
guid = '',
title = '',
link = '',
pubDate = '',
Expand All @@ -29,6 +31,7 @@ const transform = (item, options) => {
const published = useISODateFormat ? toISODateString(pubDate) : pubDate

const entry = {
id: getEntryId(guid, link, pubDate),
title: getText(title),
link: getPureUrl(link),
published,
Expand Down
2 changes: 1 addition & 1 deletion src/utils/retrieve.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const profetch = async (url, proxy = {}) => {
export default async (url, options = {}) => {
const {
headers = {
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:104.0) Gecko/20100101 Firefox/104.0'
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0'
},
proxy = null
} = options
Expand Down
1 change: 0 additions & 1 deletion test-data/atom-multilinks.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
<link rel="enclosure" type="audio/mpeg" length="1337"
href="http://example.org/audio/ph34r_my_podcast.mp3"/>
<summary>Comfortable and soft, this sweater will keep you warm on those cold winter nights.</summary>
<id>tag:google.com,2005-10-15:/support/products</id>
<issued>2005-10-13T18:30:02Z</issued>
<modified>2005-10-13T18:30:02Z</modified>
<g:image_link>http://www.example.com/image1.jpg</g:image_link>
Expand Down

0 comments on commit e9c5d67

Please sign in to comment.