Skip to content

Commit

Permalink
Merge pull request #620 from yashkohli88/yk/add-default-headers-centr…
Browse files Browse the repository at this point in the history
…ally

Update fetch file to centralize default headers
qtomlinson authored Nov 5, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
2 parents a1d12ac + 178b4a3 commit 59e652b
Showing 6 changed files with 77 additions and 23 deletions.
6 changes: 5 additions & 1 deletion lib/fetch.js
Original file line number Diff line number Diff line change
@@ -3,6 +3,10 @@

const axios = require('axios')

const defaultHeaders = Object.freeze({ 'User-Agent': 'clearlydefined.io crawler ([email protected])' })

axios.defaults.headers = defaultHeaders

function buildRequestOptions(request) {
let responseType = 'text'
if (request.json) {
@@ -45,4 +49,4 @@ function withDefaults(opts) {
return request => callFetch(request, axiosInstance)
}

module.exports = { callFetch, withDefaults }
module.exports = { callFetch, withDefaults, defaultHeaders }
4 changes: 1 addition & 3 deletions providers/fetch/cratesioFetch.js
Original file line number Diff line number Diff line change
@@ -51,8 +51,7 @@ class CratesioFetch extends AbstractFetch {
try {
registryData = await request({
url: `https://crates.io/api/v1/crates/${spec.name}`,
json: true,
headers: { 'User-Agent': 'clearlydefined.io crawler ([email protected])' }
json: true
})
} catch (exception) {
if (exception.statusCode !== 404) throw exception
@@ -72,7 +71,6 @@ class CratesioFetch extends AbstractFetch {
url: `https://crates.io${version.dl_path}`,
encoding: null,
headers: {
'User-Agent': 'clearlydefined.io crawler ([email protected])',
Accept: 'text/html'
}
})
8 changes: 3 additions & 5 deletions providers/fetch/mavenBasedFetch.js
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@
// SPDX-License-Identifier: MIT

const AbstractFetch = require('./abstractFetch')
const { withDefaults } = require('../../lib/fetch')
const { callFetch, defaultHeaders } = require('../../lib/fetch')
const nodeRequest = require('request')
const { clone, get } = require('lodash')
const { promisify } = require('util')
@@ -23,14 +23,12 @@ const extensionMap = {
jar: '.jar'
}

const defaultHeaders = { headers: { 'User-Agent': 'clearlydefined.io crawler ([email protected])' } }

class MavenBasedFetch extends AbstractFetch {
constructor(providerMap, options) {
super(options)
this._providerMap = { ...providerMap }
this._handleRequestPromise = options.requestPromise || withDefaults(defaultHeaders)
this._handleRequestStream = options.requestStream || nodeRequest.defaults(defaultHeaders).get
this._handleRequestPromise = options.requestPromise || callFetch
this._handleRequestStream = options.requestStream || nodeRequest.defaults({ headers: defaultHeaders }).get
}

canHandle(request) {
5 changes: 2 additions & 3 deletions providers/fetch/packagistFetch.js
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@ const nodeRequest = require('request')
const { promisify } = require('util')
const readdir = promisify(fs.readdir)
const FetchResult = require('../../lib/fetchResult')
const { defaultHeaders } = require('../../lib/fetch')

const providerMap = {
packagist: 'https://repo.packagist.org/'
@@ -62,9 +63,7 @@ class PackagistFetch extends AbstractFetch {
return new Promise((resolve, reject) => {
const options = {
url: distUrl,
headers: {
'User-Agent': 'clearlydefined.io crawler ([email protected])'
}
headers: defaultHeaders
}
nodeRequest
.get(options, (error, response) => {
46 changes: 42 additions & 4 deletions test/unit/lib/fetchTests.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
const { fail } = require('assert')
const { callFetch, withDefaults } = require('../../../lib/fetch')
const { callFetch, withDefaults, defaultHeaders } = require('../../../lib/fetch')
const { expect } = require('chai')
const fs = require('fs')
const mockttp = require('mockttp')

function checkDefaultHeaders(headers) {
for (const [key, value] of Object.entries(defaultHeaders)) {
expect(headers).to.have.property(key.toLowerCase()).that.equals(value)
}
}
describe('CallFetch', () => {
describe('with mock server', () => {
const mockServer = mockttp.getLocal()
@@ -23,6 +28,37 @@ describe('CallFetch', () => {
expect(response).to.be.deep.equal(JSON.parse(expected))
})

it('checks if the default header user-agent and other header is present in crate components', async () => {
const path = '/crates.io/api/v1/crates/name/1.0.0/download'
const endpointMock = await mockServer.forGet(path).thenReply(200, 'success')

await callFetch({
url: mockServer.urlFor(path),
method: 'GET',
json: true,
encoding: null,
headers: {
Accept: 'text/html'
}
})
const requests = await endpointMock.getSeenRequests()
checkDefaultHeaders(requests[0].headers)
expect(requests[0].headers).to.include({ accept: 'text/html' })
})

it('checks if the default header user-agent is present in crate components', async () => {
const path = '/crates.io/api/v1/crates/name'
const endpointMock = await mockServer.forGet(path).thenReply(200, 'success')

await callFetch({
url: mockServer.urlFor(path),
method: 'GET',
json: true
})
const requests = await endpointMock.getSeenRequests()
checkDefaultHeaders(requests[0].headers)
})

it('checks if the full response is fetched', async () => {
const path = '/registry.npmjs.com/redis/0.1.0'
const expected = fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')
@@ -87,17 +123,17 @@ describe('CallFetch', () => {
const url = mockServer.urlFor(path)
const endpointMock = await mockServer.forGet(path).thenReply(200)

const defaultOptions = { headers: { 'user-agent': 'clearlydefined.io crawler ([email protected])' } }
const defaultOptions = { headers: defaultHeaders }
const requestWithDefaults = withDefaults(defaultOptions)
await requestWithDefaults({ url })
await requestWithDefaults({ url })

const requests = await endpointMock.getSeenRequests()
expect(requests.length).to.equal(2)
expect(requests[0].url).to.equal(url)
expect(requests[0].headers).to.include(defaultOptions.headers)
checkDefaultHeaders(requests[0].headers)
expect(requests[1].url).to.equal(url)
expect(requests[1].headers).to.include(defaultOptions.headers)
checkDefaultHeaders(requests[1].headers)
})

it('checks if the response is text with uri option in GET request', async () => {
@@ -129,6 +165,8 @@ describe('CallFetch', () => {
const json = await requests[0].body.getJson()
expect(json).to.deep.equal({ test: 'test' })
expect(requests[0].headers).to.include({ 'x-crawler': 'secret' })
//Check for the default header value
checkDefaultHeaders(requests[0].headers)
})

describe('test simple', () => {
31 changes: 24 additions & 7 deletions test/unit/providers/fetch/mavenBasedFetchTests.js
Original file line number Diff line number Diff line change
@@ -2,8 +2,15 @@ const { expect } = require('chai')
const MavenBasedFetch = require('../../../../providers/fetch/mavenBasedFetch')
const mockttp = require('mockttp')
const sinon = require('sinon')
const { defaultHeaders } = require('../../../../lib/fetch')
const Request = require('../../../../ghcrawler').request

function checkDefaultHeaders(headers) {
for (const [key, value] of Object.entries(defaultHeaders)) {
expect(headers).to.have.property(key.toLowerCase()).that.equals(value)
}
}

describe('MavenBasedFetch', () => {
describe('find contained file stat', () => {
it('file contained in root dir', async () => {
@@ -20,24 +27,34 @@ describe('MavenBasedFetch', () => {
})
})

describe('Integration test for component not found', function () {
describe('Integration test', function () {
const path = '/remotecontent?filepath='
const mockServer = mockttp.getLocal()
beforeEach(async () => await mockServer.start())
afterEach(async () => await mockServer.stop())

it('should handle maven components not found', async () => {
const handler = new MavenBasedFetch(
let endpointMock
let handler
beforeEach(async () => {
await mockServer.start()
handler = new MavenBasedFetch(
{
mavencentral: mockServer.urlFor(path)
},
{ logger: { log: sinon.stub() } }
)
await mockServer.forAnyRequest().thenReply(404)
endpointMock = await mockServer.forAnyRequest().thenReply(404)
})
afterEach(async () => await mockServer.stop())

it('should handle maven components not found', async () => {
const request = await handler.handle(
new Request('test', 'cd:/maven/mavencentral/org.apache.httpcomponents/httpcore/4.')
)
expect(request.processControl).to.be.equal('skip')
})

it('should check for default header in any request', async () => {
await handler.handle(new Request('test', 'cd:/maven/mavencentral/org.apache.httpcomponents/httpcore/4.4.16'))
const requests = await endpointMock.getSeenRequests()
checkDefaultHeaders(requests[0].headers)
})
})
})

0 comments on commit 59e652b

Please sign in to comment.