Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes and updates #45

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ module.exports = {
'no-console': 'off',
'default-case': 'off',
'no-prototype-builtins': 'off',
'linebreak-style': 0,
},
};
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ Starting from version 1.2.3, you can now post local markdown files to the platfo
For example:

```bash
cross-post run /path/to/test.md -l
# canonicalUrl is optional
cross-post run /path/to/test.md -l <canonicalUrl>
```

You can also use any of the previous options mentioned.
Expand Down
2 changes: 1 addition & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ program.usage('[command] [options]');
program
.command('run <url>')
.description('Cross post a blog post')
.option('-l, --local', 'Use if the you want to directly post a local Markdown file. <url> in this case should be the path to the file')
.option('-l, --local [canonicalUrl]', 'For using a local Markdown file, <url> will be the path and <canonicalUrl> is optional')
.option('-t, --title [title]', 'Title for the article')
.option('-p, --platforms [platforms...]', `Platforms to post articles to. Allowed values are: ${allowedPlatforms.join(', ')}`)
.option('-s, --selector [selector]', 'The selector to look for in the document in the URL supplied. By default, it will be article. '
Expand Down
79 changes: 37 additions & 42 deletions src/commands/run.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
const fs = require('fs');
const path = require('path');
const process = require('process');
const Conf = require('conf');
const got = require('got');
const jsdom = require('jsdom');
const { JSDOM } = require('jsdom');
const htmlparser2 = require('htmlparser2');
const { URLSearchParams } = require('url');
const { marked } = require('marked');

const { JSDOM } = jsdom;

const TurndownService = require('turndown');
const CLI = require('clui');

Expand All @@ -23,7 +20,7 @@ const {
displaySuccess,
isPlatformAllowed,
platformNotAllowedMessage,
isDataURL,
isDataURL, getRemoteArticleDOM, findMainContentElements, formatMarkdownImages,
} = require('../utils');
const postToDev = require('./platforms/dev');
const postToHashnode = require('./platforms/hashnode');
Expand Down Expand Up @@ -63,30 +60,27 @@ function search(type, node) {
}

/**
*
*
* @param {*} url the string that has provided by the user
* @returns
* @returns
*/
async function getImageForHashnode(url) {
try {
const response = await got(url);
let count = 0, imageUrl;
const parser = new htmlparser2.Parser({
onopentag: function(name, attribs) {
if (name === 'img' && attribs.src && attribs.src.includes('/_next/image')) {
count += 1;
if (count === 2) {
imageUrl = attribs.src;
}
const response = await got(url);
let count = 0;
let imageUrl;
const parser = new htmlparser2.Parser({
onopentag(name, attribs) {
if (name === 'img' && attribs.src && attribs.src.includes('/_next/image')) {
count += 1;
if (count === 2) {
imageUrl = attribs.src;
}
},
});
parser.write(response.body);
parser.end();
return imageUrl;
} catch (error) {
//pass
}
}
},
});
parser.write(response.body);
parser.end();
return imageUrl;
}

/**
Expand Down Expand Up @@ -153,7 +147,7 @@ function postToPlatforms(title, markdown, url, image, p) {
/**
*
* @param {string} url URL of the blog post
* @param {object} param1 The parameters from the command line
* @param {object} options The parameters from the command line
*/
async function run(url, options) {
let {
Expand Down Expand Up @@ -183,12 +177,7 @@ async function run(url, options) {
}

// check if configurations exist for the platforms
const errorPlatform = chosenPlatforms.find((platform) => {
if (!configstore.get(platform)) {
return true;
}
return false;
});
const errorPlatform = chosenPlatforms.find((platform) => !configstore.get(platform));

if (errorPlatform) {
console.error(
Expand Down Expand Up @@ -282,18 +271,16 @@ async function run(url, options) {
if (image) {
image = image.getAttribute('src');
}
} else if (url.includes('hashnode')) {
await getImageForHashnode(url).then((img) => {
const params = new URLSearchParams(img.split('?')[1]);
image = params.get('url');
});
} else {
if (url.includes("hashnode")) {
await getImageForHashnode(url).then((img) => {
const params = new URLSearchParams(img.split('?')[1]);
image = params.get('url');
});
}else{
image = search('image')
}
image = search('image', articleNode);
}
}
// check if image is dataurl
// check if image is data-url
if (image && isDataURL(image)) {
const res = await uploadToCloudinary(image);
image = res.url;
Expand All @@ -304,8 +291,16 @@ async function run(url, options) {
}
}
}
// create links for images in files
const isLocalAPath = typeof local === 'string';

const articleDOM = isLocalAPath && await getRemoteArticleDOM(local);
const mainElement = isLocalAPath && findMainContentElements(articleDOM.window.document.body);
markdown = isLocalAPath ? formatMarkdownImages(markdown, mainElement, local) : markdown;
const newURL = local ? '' : url;
const canonicalURL = isLocalAPath ? local : newURL;

postToPlatforms(title, markdown, local ? '' : url, image, p);
postToPlatforms(title, markdown, canonicalURL, image, p);
} else {
handleError('No articles found in the URL.');
}
Expand Down
181 changes: 181 additions & 0 deletions src/utils.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,185 @@
const chalk = require('chalk');
const { get } = require('axios');
const { JSDOM } = require('jsdom');

const allowedPlatforms = ['dev', 'hashnode', 'medium'];

/**
* Replaces the 'http' scheme with 'https' in a given URL.
*
* @function
* @name enforceHTTPS
* @param {string} url - The URL to be converted to HTTPS.
* @returns {string|null} - The URL with 'https' scheme, or null if the input is null.
*
* @example
* const url = "http://example.com";
* const httpsUrl = enforceHTTPS(url); // Output will be "https://example.com"
*/
const enforceHTTPS = (url) => url?.replace(/^(http:\/\/)/, 'https://');

/**
* Fetches the HTML content from a remote URL and returns it as a JSDOM object.
*
* @async
* @function
* @name getRemoteArticleDOM
* @param {string} url - The URL of the remote article to fetch.
* @returns {Promise<JSDOM>} - A promise that resolves to a JSDOM object containing
* the HTML content of the remote article.
*/
const getRemoteArticleDOM = async (url) => {
const { data } = await get(enforceHTTPS(url));
return new JSDOM(data);
};

/**
* Finds the nearest common ancestor of an array of HTML elements.
*
* @function
* @name findNearestCommonAncestor
* @param {HTMLElement[]} elements - An array of HTML elements for which to find
* the nearest common ancestor.
* @returns {HTMLElement|null} - The nearest common ancestor element, or null
* if the input array is empty or null.
*
* @example
* const elem1 = document.getElementById('elem1');
* const elem2 = document.getElementById('elem2');
* const commonAncestor = findNearestCommonAncestor([elem1, elem2]);
*
* // commonAncestor will contain the nearest common ancestor HTMLElement or null.
*/
const findNearestCommonAncestor = (elements) => {
if (elements?.length === 0) {
return null;
}
const findAncestors = (element, ancestorsSet) => {
if (element) {
ancestorsSet.add(element);
findAncestors(element.parentElement, ancestorsSet);
}
};
const ancestorsList = elements.map((element) => {
const ancestors = new Set();
findAncestors(element, ancestors);
return ancestors;
});

const commonAncestors = ancestorsList.reduce((acc, currSet) => acc
.filter((ancestor) => currSet.has(ancestor)), [...ancestorsList[0]]);

return commonAncestors[0] || null;
};

/**
* Ranks HTML elements based on how many text density it has
* and returns the top 20 elements that contain a `<p>` tag.
*
* @function
* @name rankingTag
* @param {HTMLElement} document - The HTML jsdom element representing the root of the document.
* @returns {HTMLElement[]} - An array of the top 20 HTMLElements that contain a `<p>` tag.
*
*/
const rankingTag = (document) => {
const elements = document.querySelectorAll('p, blockquote, h1, h2, h3, h4, h5, h6');
const scoreTag = {
p: 0.8,
blockquote: 0.9,
h1: 0.6,
h2: 0.6,
h3: 0.6,
h4: 0.6,
h5: 0.6,
h6: 0.6,
};

const { elementScores, elementHasPTag } = Array.from(elements).reduce(
(acc, element) => {
const textLength = element.textContent.length;
const tagName = element.tagName.toLowerCase();

if (tagName.includes('-')) {
return acc;
}

const scoreMultiplier = scoreTag[tagName];
const score = textLength * scoreMultiplier;
const { parentElement } = element;

if (parentElement && !parentElement.tagName.toLowerCase().includes('-')) {
if (acc.elementScores.has(parentElement)) {
acc.elementScores.set(parentElement, acc.elementScores.get(parentElement) + score);
} else {
acc.elementScores.set(parentElement, score);
}

if (tagName === 'p') {
acc.elementHasPTag.set(parentElement, true);
}
}

return acc;
},
{ elementScores: new Map(), elementHasPTag: new Map() },
);

return Array.from(elementScores.entries())
.filter(([parentElement]) => elementHasPTag.has(parentElement))
.sort(([, scoreA], [, scoreB]) => scoreB - scoreA)
.slice(0, 20)
.map(([element]) => element);
};
const findMainContentElements = (document) => findNearestCommonAncestor(rankingTag(document));

/**
* Formats Markdown images within the provided Markdown string.
*
* @function
* @name formatMarkdownImages
* @param {string} markdown - The Markdown text that needs to be formatted.
* @param {HTMLElement} element - The HTMLElement (from jsdom) where images will be extracted.
* @param {string} url - The URL to be used for setting the images absolute path
* @returns {string} - The formatted Markdown string.
*
* @example
* const markdown = "![Alt text](/path/to/image.jpg)";
* const element = new jsdom.window.HTMLElement('body');
* const url = "https://example.com";
* const result = '![Alt text](https://example.com/imagefromElement.png)'
*/
const formatMarkdownImages = (markdown, element, url) => {
const formattedUrl = new URL(url);
formattedUrl.pathname = '';
formattedUrl.search = '';
formattedUrl.hash = '';

const baseUrl = formattedUrl.toString();

const prefixUrl = (URL) => enforceHTTPS(!URL.startsWith('http://') && !URL.startsWith('https://') ? baseUrl + URL : URL);

const imagesSrc = Array.from(element.querySelectorAll('img, picture')).map((HTMLImage) => {
const { src, tagName } = HTMLImage || {};

if (tagName.toLowerCase() === 'img') return src ? prefixUrl(src) : null;
if (tagName.toLowerCase() === 'picture') {
const { srcset } = HTMLImage.querySelector('source') || {};
const srcsetItems = srcset.split(',');
if (srcset) return prefixUrl(srcsetItems[srcsetItems.length - 1].trim().split(' ')[0]);
}
return null;
}).filter(Boolean);

if (url.includes('medium.com')) { imagesSrc.shift(); } // first image is always the profile image

const GRAB_IMAGES_MARKDOWN_REGEX = /!\[(.*?)]\((.*?)\)/g;
return markdown.replace(GRAB_IMAGES_MARKDOWN_REGEX, (match, p1, p2) => {
const newUrl = imagesSrc.shift() || p2;
return `![${p1}](${newUrl})`;
});
};

module.exports = {
allowedPlatforms,
displayError: chalk.bold.red,
Expand All @@ -20,4 +198,7 @@ module.exports = {
return !!s.match(regex);
},
imagePlatform: 'cloudinary',
findMainContentElements,
getRemoteArticleDOM,
formatMarkdownImages,
};