diff --git a/README.md b/README.md
index 2bf7cc7..ef9e175 100644
--- a/README.md
+++ b/README.md
@@ -168,7 +168,6 @@ npm run test
- [dictionary-en-gb](https://ghub.io/dictionary-en-gb): English (United Kingdom) spelling dictionary in UTF-8
- [html-to-text](https://ghub.io/html-to-text): Advanced html to plain text converter
- [nlcst-to-string](https://ghub.io/nlcst-to-string): Stringify NLCST
-- [node-readability](https://ghub.io/node-readability): Turning any web page into a clean view.
- [vfile-reporter-json](https://ghub.io/vfile-reporter-json): JSON reporter for virtual files
@@ -186,3 +185,7 @@ npm run test
## License
This project is licensed under the GNU GENERAL PUBLIC LICENSE Version 3 - see the [LICENSE](LICENSE) file for details
+
+## Notes
+
+Due to [node-readability](https://github.com/luin/readability) being stale I have imported the relevent functions into this project and refactored it so it doesn't use [request](https://github.com/request/request) and therfor has no vulnrabilities.
diff --git a/helpers.js b/helpers.js
new file mode 100644
index 0000000..9f286a5
--- /dev/null
+++ b/helpers.js
@@ -0,0 +1,673 @@
+module.exports.capitalizeFirstLetter = function (string) {
+ return string.charAt(0).toUpperCase() + string.slice(1)
+}
+
+module.exports.toTitleCase = function (str) {
+ return str.replace(/\w\S*/g, function (txt) {
+ return txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase()
+ })
+}
+
+// All of the regular expressions in use within readability.
+const regexps = {
+ unlikelyCandidatesRe: /combx|modal|comment|disqus|foot|header|menu|meta|nav|rss|shoutbox|sidebar|sponsor|social|teaserlist|time|tweet|twitter/i,
+ okMaybeItsACandidateRe: /and|article|body|column|main|story|entry|^post/im,
+ positiveRe: /article|body|content|entry|hentry|page|pagination|post|section|chapter|description|main|blog|text/i,
+ negativeRe: /combx|comment|contact|foot|footer|footnote|link|media|meta|promo|related|scroll|shoutbox|sponsor|utility|tags|widget/i,
+ divToPElementsRe: /<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i,
+ replaceBrsRe: /(
]*>[ \n\r\t]*){2,}/gi,
+ replaceFontsRe: /<(\/?)font[^>]*>/gi,
+ trimRe: /^\s+|\s+$/g,
+ normalizeRe: /\s{2,}/g,
+ killBreaksRe: /(
(\s| ?)*){1,}/g,
+ videoRe: /http:\/\/(www\.)?(youtube|vimeo|youku|tudou|56|yinyuetai)\.com/i,
+ attributeRe: /blog|post|article/i
+}
+
+let debug
+const dbg = (debug) ? console.log : function () {}
+
+let cleanRules = []
+
+module.exports.setCleanRules = function (rules) {
+ cleanRules = rules
+}
+
+/**
+ * Prepare the HTML document for readability to scrape it.
+ * This includes things like stripping javascript, CSS, and handling terrible markup.
+ *
+ * @return void
+ **/
+module.exports.prepDocument = function (document) {
+ const frames = document.getElementsByTagName('frame')
+ if (frames.length > 0) {
+ let bestFrame = null
+ let bestFrameSize = 0
+
+ Array.prototype.slice.call(frames, 0).forEach(function (frame) {
+ const frameSize = frame.offsetWidth + frame.offsetHeight
+ let canAccessFrame = false
+ try {
+ if (frame.contentWindow.document.body) {
+ canAccessFrame = true
+ }
+ } catch (e) {}
+
+ if (canAccessFrame && frameSize > bestFrameSize) {
+ bestFrame = frame
+ bestFrameSize = frameSize
+ }
+ })
+
+ if (bestFrame) {
+ const newBody = document.createElement('body')
+ newBody.innerHTML = bestFrame.contentWindow.document.body.innerHTML
+ newBody.style.overflow = 'scroll'
+ document.body = newBody
+
+ const frameset = document.getElementsByTagName('frameset')[0]
+ if (frameset) {
+ frameset.parentNode.removeChild(frameset)
+ }
+ }
+ }
+
+ // Strip out all