Skip to content

Commit

Permalink
Merge pull request #269 from hyper63/twilson63/feat-core-add-crawler-268
Browse files Browse the repository at this point in the history
Twilson63/feat core add crawler 268
  • Loading branch information
twilson63 committed Jul 23, 2021
2 parents 6138119 + 3d9e8e1 commit 37fb76b
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 3 deletions.
1 change: 1 addition & 0 deletions packages/core/deps.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ export { data } from "https://x.nest.land/[email protected]/mod.js";
export { storage } from "https://x.nest.land/[email protected]/mod.js";
export { search } from "https://x.nest.land/[email protected]/mod.js";
export { hooks } from "https://x.nest.land/[email protected]/mod.js";
export { crawler } from "https://x.nest.land/[email protected]/mod.js";
7 changes: 6 additions & 1 deletion packages/core/dev_deps.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
export {
assert,
assertEquals,
assertObjectMatch,
} from "https://deno.land/[email protected]/testing/asserts.ts";
} from "https://deno.land/[email protected]/testing/asserts.ts";

export {
encode as base64Encode,
} from "https://deno.land/[email protected]/encoding/base64.ts";
34 changes: 34 additions & 0 deletions packages/core/lib/crawler/mod.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { apply, of, triggerEvent } from "../utils/mod.js";

export default function ({ crawler, events }) {
const upsert = (job) =>
of(job)
.chain(apply("upsert"))
.chain(triggerEvent("CRAWLER:UPSERT_JOB"))
.runWith({ svc: crawler, events });

const get = (app, name) =>
of({ app, name })
.chain(apply("get"))
.chain(triggerEvent("CRAWLER:GET_JOB"))
.runWith({ svc: crawler, events });

const start = (app, name) =>
of({ app, name })
.chain(apply("start"))
.chain(triggerEvent("CRAWLER:START_JOB"))
.runWith({ svc: crawler, events });

const remove = (app, name) =>
of({ app, name })
.chain(apply("delete"))
.chain(triggerEvent("CRAWLER:DELETE_JOB"))
.runWith({ svc: crawler, events });

return Object.freeze({
upsert,
get,
start,
remove,
});
}
62 changes: 62 additions & 0 deletions packages/core/lib/crawler/mod_test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import { assert, assertEquals, base64Encode } from "../../dev_deps.js";
import crawler from "./mod.js";

const test = Deno.test;

const events = {
dispatch: () => null,
};

const mockCrawler = {
upsert: () => {
//console.log(doc)
return Promise.resolve({ ok: true });
},
get: ({ app, name }) => Promise.resolve({ id: `${app}-${name}` }),
"delete": () => Promise.resolve({ ok: true }),
start: () => Promise.resolve({ ok: true }),
post: () => Promise.resolve({ ok: true }),
};

const { upsert, get, start, remove } = crawler({
crawler: mockCrawler,
events,
});

test("remove job", async () => {
const result = await remove("test", "spider").toPromise();
assert(result.ok);
});

test("start crawl", async () => {
const result = await start("test", "spider").toPromise();
assert(result.ok);
});

test("get job", async () => {
const result = await get("test", "spider").toPromise();
assertEquals(result.id, "test-spider");
});

test("upsert crawler job", async () => {
const result = await upsert({
app: "test",
name: "secret",
source: "https://example.com",
depth: 2,
script: base64Encode(`
let content = '';
document.querySelectorAll('main p').forEach(el => content = content.concat('\n', el.textContent));
return { title: document.title, content };`),
target: {
url: "https://jsonplaceholder.typicode.com/posts",
secret: "secret",
sub: "SPIDER",
aud: "https://example.com",
},
notify: "https://example.com",
}).toPromise();

console.log("result", result);
assert(result.ok);
});
3 changes: 2 additions & 1 deletion packages/core/ports.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// deno-lint-ignore-file no-unused-vars
import { cache, data, hooks, queue, search, storage } from "./deps.js";
import { cache, crawler, data, hooks, queue, search, storage } from "./deps.js";

export default function (adapters) {
return ({
Expand All @@ -8,6 +8,7 @@ export default function (adapters) {
storage: adapters.storage ? storage(adapters.storage) : null,
search: adapters.search ? search(adapters.search) : null,
queue: adapters.queue ? queue(adapters.queue) : null,
crawler: adapters.crawler ? crawler(adapters.crawler) : null,
hooks: adapters.hooks,
});
}
3 changes: 2 additions & 1 deletion packages/core/scripts/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

deno lint
deno fmt --check
deno test --unstable --allow-env lib/storage/*_test.js utils/*_test.js lib/cache/*_test.js lib/data/*_test.js
deno test --unstable --allow-env lib/storage/*_test.js utils/*_test.js lib/cache/*_test.js lib/data/*_test.js lib/crawler/*_test.js

0 comments on commit 37fb76b

Please sign in to comment.