Skip to content

Commit

Permalink
Merge pull request #1884 from mashehu/reduce-build-time
Browse files Browse the repository at this point in the history
reduce built time through content collections
  • Loading branch information
mashehu authored Oct 4, 2023
2 parents 5244aaa + dbde7f8 commit 132f775
Show file tree
Hide file tree
Showing 22 changed files with 309 additions and 364 deletions.
Binary file modified .cache.tar.xz
Binary file not shown.
17 changes: 2 additions & 15 deletions .github/workflows/playwright.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,8 @@ jobs:
- uses: actions/setup-node@v3
with:
node-version: 18
- name: Get current date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Cache node modules
id: cache-npm
uses: actions/cache@v3
env:
cache-name: cache-node-modules
with:
# npm cache files are stored in `~/.npm` on Linux/macOS
path: ~/.npm
key: ${{ runner.os }}-build-playwright-${{ env.cache-name }}-${{ hashFiles('**/package-lock.json') }}-${{ steps.date.outputs.date }}

- if: ${{ steps.cache-npm.outputs.cache-hit != 'true' }}
name: Install dependencies
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Install Playwright Browsers
run: npx playwright install --with-deps
Expand Down
143 changes: 101 additions & 42 deletions bin/build-cache.js
Original file line number Diff line number Diff line change
@@ -1,58 +1,117 @@
#! /usr/bin/env node
import { getGitHubFile, getCurrentRateLimitRemaining } from '../src/components/octokit.js';
import Cache from 'file-system-cache';
import { readFileSync } from 'fs';
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
import path from 'path';
import ProgressBar from 'progress';


const cache = Cache.default({
basePath: './.cache',
ns: 'nf-core',
});
import { S3Client, ListObjectsV2Command } from '@aws-sdk/client-s3';
import cache from './cache.js';

// get current path
const __dirname = path.resolve();

// check for `--force` flag
const force = process.argv.includes('--force');

console.log(await getCurrentRateLimitRemaining());
export const buildCache = async () => {
const pipelinesJson = readFileSync(path.join(__dirname, '/public/pipelines.json'));
const pipelines = JSON.parse(pipelinesJson);

let bar = new ProgressBar(' caching markdown [:bar] :percent :etas', { total: pipelines.remote_workflows.length });

// go through the releases of each pipeline and get the files which are needed for the pipeline pages
for (const pipeline of pipelines.remote_workflows) {
// console.log(`Caching ${pipeline.name}`);
const { name } = pipeline;
let releases = pipeline.releases;
for (const release of releases) {
// console.log(`Caching ${name} ${release.tag_name}`);
release.doc_files.push('README.md'); // add the README to the cache
if (release.has_schema) {
release.doc_files.push('nextflow_schema.json'); // add the nextflow_schema.json to the cache
async function getKeysWithPrefixes(prefixes) {
let client = new S3Client({
region: 'eu-west-1',
signer: { sign: async (request) => request },
});
const keys = [];
const commonPrefixes = [];

for (const prefix of prefixes) {
let continuationToken = undefined;
do {
const command = new ListObjectsV2Command({
Bucket: 'nf-core-awsmegatests',
Prefix: prefix,
ContinuationToken: continuationToken,
Delimiter: '/',
});
try {
const response = await client.send(command);
if (response.Contents) {
for (const object of response.Contents) {
keys.push(object);
}
}
if (response.CommonPrefixes) {
for (const object of response.CommonPrefixes) {
commonPrefixes.push(object);
}
}

continuationToken = response.NextContinuationToken;
} catch (error) {
console.error('Error retrieving keys:', error);
return [];
}
const version = release.tag_name;
for (const f of release.doc_files) {
const cache_key = `${name}/${version}/${f}`;
// console.log(`Checking ${cache_key}`);
const is_cached = cache.getSync(cache_key, false) && cache.getSync(cache_key, false).length > 0;
if (!is_cached || force || version === 'dev') {
const content = await getGitHubFile(name, f, version);
// console.log(`Caching ${cache_key}`);
cache.set(cache_key, content);
// console.log(`Cached ${cache_key}`);
} else {
// console.log(`Already cached ${cache_key}`);
} while (continuationToken);
}

return { keys: keys, commonPrefixes: commonPrefixes };
}

(async () => {
console.log(await getCurrentRateLimitRemaining());
const buildCache = async () => {
// build the pipeline cache
const pipelinesJson = readFileSync(path.join(__dirname, '/public/pipelines.json'));
const pipelines = JSON.parse(pipelinesJson);
let bar = new ProgressBar(' caching markdown [:bar] :percent :etas', { total: pipelines.remote_workflows.length });
// go through the releases of each pipeline and get the files which are needed for the pipeline pages
for (const pipeline of pipelines.remote_workflows) {
// console.log(`Caching ${pipeline.name}`);
const { name } = pipeline;
let releases = pipeline.releases;
for (const release of releases) {
// console.log(`Caching ${name} ${release.tag_name}`);
release.doc_files.push('README.md'); // add the README to the cache
if (release.has_schema) {
release.doc_files.push('nextflow_schema.json'); // add the nextflow_schema.json to the cache
}
const version = release.tag_name;
for (const f of release.doc_files) {
const cache_key = `${name}/${version}/${f}`;
// console.log(`Checking ${cache_key}`);
const is_cached = existsSync(path.join(__dirname, '.cache', cache_key));
if (!is_cached || force || version === 'dev') {
const content = await getGitHubFile(name, f, version);
// console.log(`Caching ${cache_key}`);
// cache.set(cache_key, content);
// console.log(`Cached ${cache_key}`);
//generate folder structure
const parent = cache_key.split('/').slice(0, -1).join('/');
mkdirSync(path.join(__dirname, '.cache', parent), { recursive: true });
writeFileSync(path.join(__dirname, '.cache', cache_key), content);
} else {
// console.log(`Already cached ${cache_key}`);
}
}
}

bar.tick();
}
// cache aws results
const cache_key = `aws.json`;
const is_cached = existsSync(path.join(__dirname, '.cache', cache_key));
if (!is_cached || force) {
const prefixes = pipelines.remote_workflows.flatMap((pipeline) => {
return pipeline.releases
.filter((rel) => rel.tag_name !== 'dev')
.map((release) => {
return `${pipeline.name}/results-${release.tag_sha}/`;
});
});

bar.tick();
}
return true;
};
buildCache();
const awsResponse = await getKeysWithPrefixes(prefixes);
console.log(`Caching ${cache_key}`);
cache.set(cache_key, { commonPrefixes: awsResponse.commonPrefixes, bucketContents: awsResponse.keys });
}

console.log('Done');
return true;
};
await buildCache();
})();
52 changes: 52 additions & 0 deletions bin/build-content-collection.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#! /usr/bin/env node
// move markdown files from cache to src/content
import { promises, readdirSync, statSync, existsSync, mkdirSync } from 'fs';
import path from 'path';


(async () => {
async function buildContentCollection() {
// go through all files in .cache, move them to src/content

const getAllMDFiles = (dir) =>
readdirSync(dir).reduce((files, file) => {
const name = path.join(dir, file);
const isDirectory = statSync(name).isDirectory();
if (isDirectory) {
return [...files, ...getAllMDFiles(name)];
} else {
if (/\.mdx?$/.test(name)) {
return [...files, name];
}
return files;
}
}, []);

const files = getAllMDFiles('.cache');
if (!existsSync('src/content/pipelines')) {
mkdirSync('src/content/pipelines', { recursive: true });
}
Promise.all(
// create src/content/pipelines folder if it doesn't exist

files.map(async (f) => {
let content = await promises.readFile(f, 'utf8');
const pathParts = f.split('/');
const pipeline = pathParts[1];
const version = pathParts[2];
// make relative links to png and svg files absolute in markdown to current Astro.url.pathname
content = content.replaceAll(
/(\]\()(docs\/images\/.*?\.png|svg)/gmu,
`$1${`https://raw.githubusercontent.com/nf-core/${pipeline}/${version}/$2`}`
);
const newPath = f.replace('.cache', 'src/content/pipelines');
const parent = newPath.split('/').slice(0, -1).join('/');
await promises.mkdir(parent, { recursive: true });
await promises.writeFile(newPath, content);
})
);
return true;
};

await buildContentCollection();
})();
10 changes: 10 additions & 0 deletions bin/pipelines.json.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { promises as fs, writeFileSync, existsSync } from 'fs';
import yaml from 'js-yaml';
import path, { join } from 'path';
import ProgressBar from 'progress';
import cache from './cache.js';

// get current path
const __dirname = path.resolve();
Expand Down Expand Up @@ -214,6 +215,15 @@ export const writePipelinesJson = async () => {
return component.replace('/', '_');
});
}

// cache release body except for dev
if (release.tag_name !== 'dev') {
const cache_key = `${name}/${release.tag_name}/body`;
const is_cached = cache.getSync(cache_key, false) && cache.getSync(cache_key, false).length > 0;
if (!is_cached) {
cache.set(cache_key, release.body);
}
}
return { tag_name, published_at, tag_sha, has_schema, doc_files, components };
}),
);
Expand Down
2 changes: 2 additions & 0 deletions netlify.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[functions]
external_node_modules = ["vscode-oniguruma","shiki"]
node_bundler = "esbuild"
[build]
environment = { NODE_VERSION = "20.4.0" }
Loading

0 comments on commit 132f775

Please sign in to comment.