Skip to content

Commit

Permalink
一部のクエリパラメータを消すように
Browse files Browse the repository at this point in the history
  • Loading branch information
cp-20 committed Feb 9, 2024
1 parent 70e5756 commit 2654580
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 8 deletions.
2 changes: 2 additions & 0 deletions packages/lib/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
"dependencies": {
"@mozilla/readability": "^0.5.0",
"@read-stack/tsconfig": "workspace:*",
"@types/picomatch": "^2.3.3",
"cheerio": "1.0.0-rc.12",
"iconv-lite": "^0.6.3",
"jsdom": "^23.0.1",
"picomatch": "^4.0.1",
"zod": "^3.22.4"
},
"devDependencies": {
Expand Down
19 changes: 11 additions & 8 deletions packages/lib/src/crawler/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { fetchArticleFromNote } from '@/crawler/note';
import { fetchArticleFromQiita } from '@/crawler/qiita';
import { fetchUsingReadability } from '@/crawler/readability';
import { trimUrl } from '@/crawler/trim';
import { fetchArticleFromZenn } from '@/crawler/zenn';

export interface ArticleResponse {
Expand All @@ -12,19 +13,21 @@ export interface ArticleResponse {

// server-side only
export const fetchArticle = async (
url: string,
url: string
): Promise<ArticleResponse | null> => {
if (/https?:\/\/note.com\/[^/]+\/n\/[^/]+/.exec(url)) {
return fetchArticleFromNote(url);
const trimmedUrl = trimUrl(url);

if (/https?:\/\/note.com\/[^/]+\/n\/[^/]+/.exec(trimmedUrl)) {
return fetchArticleFromNote(trimmedUrl);
}

if (/https?:\/\/qiita.com\/[^/]+\/items\/[^/]+/.exec(url)) {
return fetchArticleFromQiita(url);
if (/https?:\/\/qiita.com\/[^/]+\/items\/[^/]+/.exec(trimmedUrl)) {
return fetchArticleFromQiita(trimmedUrl);
}

if (/https?:\/\/zenn.dev\/[^/]+\/articles\/[^/]+/.exec(url)) {
return fetchArticleFromZenn(url);
if (/https?:\/\/zenn.dev\/[^/]+\/articles\/[^/]+/.exec(trimmedUrl)) {
return fetchArticleFromZenn(trimmedUrl);
}

return fetchUsingReadability(url);
return fetchUsingReadability(trimmedUrl);
};
37 changes: 37 additions & 0 deletions packages/lib/src/crawler/trim.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { isMatch } from 'picomatch';

export interface TrimData {
host: string[];
queries: string[];
}

const trimData: TrimData[] = [
{
host: ['*'],
queries: [
'utm_source',
'utm_medium',
'utm_campaign',
'utm_term',
'utm_content',
],
},
{
host: ['twitter.com', 'x.com'],
queries: ['s', 't'],
},
];

export const trimUrl = (url: string): string => {
const urlObj = new URL(url);
const data = trimData.filter((d) => isMatch(urlObj.host, d.host));

data.forEach(({ queries }) => {
queries.forEach((query) => {
// eslint-disable-next-line drizzle/enforce-delete-with-where -- drizzle関係なし
urlObj.searchParams.delete(query);
});
});

return urlObj.toString();
};
15 changes: 15 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 2654580

Please sign in to comment.