Skip to content

Commit 31edb70

Browse files
committed
refactor: fix PR remarks
1 parent a7ca943 commit 31edb70

File tree

3 files changed

+45
-40
lines changed

3 files changed

+45
-40
lines changed

packages/basic-crawler/src/internals/basic-crawler.ts

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,10 +1565,10 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
15651565
enqueueLinks: async (options: SetRequired<EnqueueLinksOptions, 'urls'>) => {
15661566
const requestQueue = await this.getRequestQueue();
15671567

1568-
return this._crawlingContextEnqueueLinksWrapper({ options, request, requestQueue });
1568+
return this.enqueueLinksWithCrawlDepth(options, request, requestQueue);
15691569
},
15701570
addRequests: async (requests: RequestsLike, options: CrawlerAddRequestsOptions = {}) => {
1571-
const requestsGenerator = this._crawlingContextAddRequestsGenerator(requests, request);
1571+
const requestsGenerator = this.addCrawlDepthRequestGenerator(requests, request);
15721572

15731573
return this.addRequests(requestsGenerator, options);
15741574
},
@@ -1660,13 +1660,11 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
16601660
* - Provides defaults for the `enqueueLinks` options based on the crawler configuration.
16611661
* - These options can be overridden by the user.
16621662
*/
1663-
protected _crawlingContextEnqueueLinksWrapper = async (params: {
1664-
options: SetRequired<EnqueueLinksOptions, 'urls'>;
1665-
request: Request<Dictionary>;
1666-
requestQueue: RequestProvider;
1667-
}): Promise<BatchAddRequestsResult> => {
1668-
const { request, requestQueue, options } = params;
1669-
1663+
protected async enqueueLinksWithCrawlDepth(
1664+
options: SetRequired<EnqueueLinksOptions, 'urls'>,
1665+
request: Request<Dictionary>,
1666+
requestQueue: RequestProvider,
1667+
): Promise<BatchAddRequestsResult> {
16701668
const transformRequestFunctionWrapper: RequestTransform = (newRequest) => {
16711669
newRequest.crawlDepth = request.crawlDepth + 1;
16721670

@@ -1690,13 +1688,13 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
16901688

16911689
transformRequestFunction: transformRequestFunctionWrapper,
16921690
});
1693-
};
1691+
}
16941692

16951693
/**
16961694
* Generator function that yields requests bound to the crawling context.
16971695
* - Injects `crawlDepth` to each request being added based on the crawling context request.
16981696
*/
1699-
protected async *_crawlingContextAddRequestsGenerator(
1697+
protected async *addCrawlDepthRequestGenerator(
17001698
requests: RequestsLike,
17011699
crawlingContextRequest: Request<Dictionary>,
17021700
): AsyncGenerator<Source, void, undefined> {

packages/playwright-crawler/src/internals/adaptive-playwright-crawler.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -693,11 +693,7 @@ export class AdaptivePlaywrightCrawler extends PlaywrightCrawler {
693693
// We need to use a mock request queue implementation, in order to add the requests into our result object
694694
const mockRequestQueue = { addRequestsBatched } as RequestQueue;
695695

696-
return await this._crawlingContextEnqueueLinksWrapper({
697-
options: { ...options, baseUrl },
698-
request,
699-
requestQueue: mockRequestQueue,
700-
});
696+
return await this.enqueueLinksWithCrawlDepth({ ...options, baseUrl }, request, mockRequestQueue);
701697
}
702698

703699
private createLogProxy(log: Log, logs: LogProxyCall[]) {

test/core/crawlers/basic_crawler.test.ts

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@ import type { Server } from 'node:http';
33
import http from 'node:http';
44
import type { AddressInfo } from 'node:net';
55

6-
import type { CrawlingContext, ErrorHandler, RequestHandler, RequestOptions, Source } from '@crawlee/basic';
6+
import type {
7+
CrawlingContext,
8+
EnqueueLinksOptions,
9+
ErrorHandler,
10+
RequestHandler,
11+
RequestOptions,
12+
Source,
13+
} from '@crawlee/basic';
714
import {
815
BasicCrawler,
916
Configuration,
@@ -21,6 +28,7 @@ import type { Dictionary } from '@crawlee/utils';
2128
import { sleep } from '@crawlee/utils';
2229
import express from 'express';
2330
import { MemoryStorageEmulator } from 'test/shared/MemoryStorageEmulator';
31+
import type { SetRequired } from 'type-fest';
2432
import type { Mock } from 'vitest';
2533
import { afterAll, beforeAll, beforeEach, describe, expect, test } from 'vitest';
2634

@@ -225,32 +233,38 @@ describe('BasicCrawler', () => {
225233
]);
226234
});
227235

228-
describe('_crawlingContextEnqueueLinksWrapper()', () => {
236+
describe('enqueueLinksWithCrawlDepth()', () => {
229237
let onSkippedRequestMock: Mock;
230238
let addRequestsBatchedMock: Mock;
231-
let options: Parameters<BasicCrawler['_crawlingContextEnqueueLinksWrapper']>[0];
239+
let options: SetRequired<EnqueueLinksOptions, 'urls'>;
240+
let request: Request;
241+
let requestQueue: RequestQueue;
242+
243+
type EnqueueLinksWrapperOptions = Parameters<BasicCrawler['enqueueLinksWithCrawlDepth']>;
244+
class TestCrawler extends BasicCrawler {
245+
public exposedEnqueueLinksWithCrawlDepth(...enqueueLinksOptions: EnqueueLinksWrapperOptions) {
246+
return this.enqueueLinksWithCrawlDepth(...enqueueLinksOptions);
247+
}
248+
}
232249

233-
const crawler = new BasicCrawler({ maxCrawlDepth: 3 });
234-
// @ts-expect-error Accessing protected method
235-
const { _crawlingContextEnqueueLinksWrapper } = crawler;
250+
const crawler = new TestCrawler({ maxCrawlDepth: 3 });
236251

237252
beforeEach(() => {
238253
addRequestsBatchedMock = vi.fn().mockImplementation(async () => ({}));
239254
onSkippedRequestMock = vi.fn();
255+
240256
options = {
241-
request: new Request({ url: 'https://example.com/', crawlDepth: 2 }),
242-
requestQueue: {
243-
addRequestsBatched: addRequestsBatchedMock as RequestQueue['addRequestsBatched'],
244-
} as RequestQueue,
245-
options: {
246-
urls: ['https://example.com/1/', 'https://example.com/2/'],
247-
onSkippedRequest: onSkippedRequestMock,
248-
},
257+
urls: ['https://example.com/1/', 'https://example.com/2/'],
258+
onSkippedRequest: onSkippedRequestMock,
249259
};
260+
request = new Request({ url: 'https://example.com/', crawlDepth: 2 });
261+
requestQueue = {
262+
addRequestsBatched: addRequestsBatchedMock as RequestQueue['addRequestsBatched'],
263+
} as RequestQueue;
250264
});
251265

252266
it('should generate requests with maxCrawlDepth', async () => {
253-
await _crawlingContextEnqueueLinksWrapper(options);
267+
await crawler.exposedEnqueueLinksWithCrawlDepth(options, request, requestQueue);
254268

255269
const requests = addRequestsBatchedMock.mock.calls[0][0];
256270
expect(requests).toHaveLength(2);
@@ -261,10 +275,8 @@ describe('BasicCrawler', () => {
261275
});
262276

263277
it('should skip requests with crawlDepth exceeding maxCrawlDepth', async () => {
264-
await _crawlingContextEnqueueLinksWrapper({
265-
...options,
266-
request: new Request({ url: 'https://example.com/', crawlDepth: 3 }),
267-
});
278+
const requestWithMaxDepth = new Request({ url: 'https://example.com/', crawlDepth: 3 });
279+
await crawler.exposedEnqueueLinksWithCrawlDepth(options, requestWithMaxDepth, requestQueue);
268280

269281
const requests = addRequestsBatchedMock.mock.calls[0][0];
270282
expect(requests).toHaveLength(0);
@@ -276,11 +288,10 @@ describe('BasicCrawler', () => {
276288
});
277289

278290
it('should respect user provided transformRequestFunction', async () => {
279-
const transformRequestFunction = vi.fn((request: RequestOptions) => request);
280-
await _crawlingContextEnqueueLinksWrapper({
281-
...options,
282-
options: { ...options.options, transformRequestFunction },
283-
});
291+
const transformRequestFunction = vi.fn((req: RequestOptions) => req);
292+
const optionsWithTransform = { ...options, transformRequestFunction };
293+
294+
await crawler.exposedEnqueueLinksWithCrawlDepth(optionsWithTransform, request, requestQueue);
284295

285296
expect(transformRequestFunction).toHaveBeenCalled();
286297

0 commit comments

Comments
 (0)