Skip to content

Commit

Permalink
Generate projects.* files from landscape data
Browse files Browse the repository at this point in the history
Signed-off-by: Sergio Castaño Arteaga <[email protected]>
Signed-off-by: Cintia Sanchez Garcia <[email protected]>
Co-authored-by: Sergio Castaño Arteaga <[email protected]>
Co-authored-by: Cintia Sanchez Garcia <[email protected]>
  • Loading branch information
tegioz and cynthia-sg committed Aug 14, 2023
1 parent a707d54 commit b731b4e
Show file tree
Hide file tree
Showing 12 changed files with 236 additions and 60 deletions.
22 changes: 22 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ aws-config = "0.56.0"
aws-sdk-s3 = "0.29.0"
chrono = { version = "0.4.26", features = ["serde"] }
clap = { version = "4.3.21", features = ["derive"] }
csv = "1.2.2"
deadpool = "0.9.5"
dirs = "5.0.1"
futures = "0.3.28"
Expand Down
2 changes: 1 addition & 1 deletion askama.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[general]
dirs = ["web/dist"]
dirs = ["templates", "web/dist"]
70 changes: 39 additions & 31 deletions src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ use crate::{
datasets::Datasets,
github::collect_github_data,
logos::prepare_logo,
projects::{generate_projects_csv, Project, ProjectsMd},
settings::{get_landscape_settings, LandscapeSettings},
tmpl, BuildArgs, LogosSource,
BuildArgs, LogosSource,
};
use anyhow::{format_err, Result};
use askama::Template;
use futures::stream::{self, StreamExt};
use rust_embed::RustEmbed;
use std::{
collections::HashMap,
env,
fs::{self, File},
io::Write,
path::Path,
Expand All @@ -28,28 +28,18 @@ use std::{
use tracing::{debug, error, info, instrument};
use uuid::Uuid;

/// Environment variable containing the Crunchbase API key.
const CRUNCHBASE_API_KEY: &str = "CRUNCHBASE_API_KEY";

/// Path where the datasets will be written to in the output directory.
const DATASETS_PATH: &str = "data";

/// Environment variable containing a comma separated list of GitHub tokens.
const GITHUB_TOKENS: &str = "GITHUB_TOKENS";
/// Path where some documents will be written to in the output directory.
const DOCS_PATH: &str = "docs";

/// Path where the logos will be written to in the output directory.
const LOGOS_PATH: &str = "logos";

/// Maximum number of logos to prepare concurrently.
const PREPARE_LOGOS_MAX_CONCURRENCY: usize = 20;

/// External services credentials.
#[derive(Debug, Default)]
struct Credentials {
crunchbase_api_key: Option<String>,
github_tokens: Option<Vec<String>>,
}

/// Embed web application assets into binary.
/// (these assets will be built automatically from the build script)
#[derive(RustEmbed)]
Expand Down Expand Up @@ -85,10 +75,9 @@ pub(crate) async fn build(args: &BuildArgs) -> Result<()> {
prepare_logos(&cache, &args.logos_source, &mut landscape_data, &args.output_dir).await?;

// Collect data from external services
let credentials = read_credentials();
let (crunchbase_data, github_data) = tokio::try_join!(
collect_crunchbase_data(&cache, &credentials.crunchbase_api_key, &landscape_data),
collect_github_data(&cache, &credentials.github_tokens, &landscape_data)
collect_crunchbase_data(&cache, &landscape_data),
collect_github_data(&cache, &landscape_data)
)?;

// Add data collected from external services to the landscape data
Expand All @@ -104,6 +93,9 @@ pub(crate) async fn build(args: &BuildArgs) -> Result<()> {
// Copy web assets files to the output directory
copy_web_assets(&args.output_dir)?;

// Generate projects.* files
generate_projects_files(&landscape_data, &args.output_dir)?;

let duration = start.elapsed().as_secs_f64();
info!("landscape website built! (took: {:.3}s)", duration);

Expand Down Expand Up @@ -172,6 +164,25 @@ fn generate_datasets(
Ok(datasets)
}

/// Generate the projects.md and projects.csv files from the landscape data.
#[instrument(skip_all, err)]
fn generate_projects_files(landscape_data: &LandscapeData, output_dir: &Path) -> Result<()> {
debug!("generating projects.* files");
let projects: Vec<Project> = landscape_data.into();

// projects.md
let projects_md = ProjectsMd { projects: &projects }.render()?;
let docs_path = output_dir.join(DOCS_PATH);
let mut file = File::create(docs_path.join("projects.md"))?;
file.write_all(projects_md.as_bytes())?;

// projects.csv
let w = csv::Writer::from_path(docs_path.join("projects.csv"))?;
generate_projects_csv(w, &projects)?;

Ok(())
}

/// Prepare logos and copy them to the output directory, updating the logo
/// reference on each landscape item.
#[instrument(skip_all, err)]
Expand Down Expand Up @@ -241,26 +252,18 @@ async fn prepare_logos(
Ok(())
}

/// Read external services credentials from environment.
#[instrument]
fn read_credentials() -> Credentials {
let mut credentials = Credentials::default();

if let Ok(crunchbase_api_key) = env::var(CRUNCHBASE_API_KEY) {
credentials.crunchbase_api_key = Some(crunchbase_api_key);
}
if let Ok(github_tokens) = env::var(GITHUB_TOKENS) {
credentials.github_tokens = Some(github_tokens.split(',').map(ToString::to_string).collect());
}

credentials
/// Template for the index document.
#[derive(Debug, Clone, Template)]
#[template(path = "index.html", escape = "none")]
struct Index<'a> {
pub datasets: &'a Datasets,
}

/// Render index file and write it to the output directory.
#[instrument(skip_all, err)]
fn render_index(datasets: &Datasets, output_dir: &Path) -> Result<()> {
debug!("rendering index.html file");
let index = tmpl::Index { datasets }.render()?;
let index = Index { datasets }.render()?;
let mut file = File::create(output_dir.join("index.html"))?;
file.write_all(index.as_bytes())?;

Expand All @@ -281,6 +284,11 @@ fn setup_output_dir(output_dir: &Path) -> Result<()> {
fs::create_dir(datasets_path)?;
}

let docs_path = output_dir.join(DOCS_PATH);
if !docs_path.exists() {
fs::create_dir(docs_path)?;
}

let logos_path = output_dir.join(LOGOS_PATH);
if !logos_path.exists() {
fs::create_dir(logos_path)?;
Expand Down
10 changes: 6 additions & 4 deletions src/crunchbase.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use mockall::automock;
use regex::Regex;
use reqwest::{header, StatusCode};
use serde::{Deserialize, Serialize};
use std::{collections::HashMap, sync::Arc, time::Duration};
use std::{collections::HashMap, env, sync::Arc, time::Duration};
use tracing::{debug, instrument, warn};

/// File used to cache data collected from Crunchbase.
Expand All @@ -23,12 +23,14 @@ const CRUNCHBASE_CACHE_FILE: &str = "crunchbase.json";
/// How long the Crunchbase data in the cache is valid (in days).
const CRUNCHBASE_CACHE_TTL: i64 = 7;

/// Environment variable containing the Crunchbase API key.
const CRUNCHBASE_API_KEY: &str = "CRUNCHBASE_API_KEY";

/// Collect Crunchbase data for each of the items orgs in the landscape,
/// reusing cached data whenever possible.
#[instrument(skip_all, err)]
pub(crate) async fn collect_crunchbase_data(
cache: &Cache,
api_key: &Option<String>,
landscape_data: &LandscapeData,
) -> Result<CrunchbaseData> {
debug!("collecting organizations information from crunchbase (this may take a while)");
Expand All @@ -42,8 +44,8 @@ pub(crate) async fn collect_crunchbase_data(
};

// Setup Crunchbase API client if an api key was provided
let cb: Option<DynCB> = if let Some(api_key) = api_key {
Some(Arc::new(CBApi::new(api_key)?))
let cb: Option<DynCB> = if let Ok(api_key) = env::var(CRUNCHBASE_API_KEY) {
Some(Arc::new(CBApi::new(&api_key)?))
} else {
warn!("crunchbase api key not provided: no information will be collected from crunchbase");
None
Expand Down
25 changes: 25 additions & 0 deletions src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ impl From<legacy::LandscapeData> for LandscapeData {
// Additional information in extra field
if let Some(extra) = legacy_item.extra {
item.artwork_url = extra.artwork_url;
item.audits = extra.audits;
item.blog_url = extra.blog_url;
item.chat_channel = extra.chat_channel;
item.clomonitor_name = extra.clomonitor_name;
Expand All @@ -253,6 +254,11 @@ impl From<legacy::LandscapeData> for LandscapeData {
item.accepted_at = Some(v);
}
}
if let Some(archived) = extra.archived {
if let Ok(v) = NaiveDate::parse_from_str(&archived, DATE_FORMAT) {
item.archived_at = Some(v);
}
}
if let Some(graduated) = extra.graduated {
if let Ok(v) = NaiveDate::parse_from_str(&graduated, DATE_FORMAT) {
item.graduated_at = Some(v);
Expand Down Expand Up @@ -333,9 +339,15 @@ pub(crate) struct Item {
#[serde(skip_serializing_if = "Option::is_none")]
pub accepted_at: Option<NaiveDate>,

#[serde(skip_serializing_if = "Option::is_none")]
pub archived_at: Option<NaiveDate>,

#[serde(skip_serializing_if = "Option::is_none")]
pub artwork_url: Option<String>,

#[serde(skip_serializing_if = "Option::is_none")]
pub audits: Option<Vec<ItemAudit>>,

#[serde(skip_serializing_if = "Option::is_none")]
pub blog_url: Option<String>,

Expand Down Expand Up @@ -429,6 +441,16 @@ impl Item {
}
}

/// Landscape item audit information.
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub(crate) struct ItemAudit {
pub date: Option<NaiveDate>,
#[serde(rename = "type")]
pub kind: Option<String>,
pub url: Option<String>,
pub vendor: Option<String>,
}

/// Landscape item featured information.
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub(crate) struct ItemFeatured {
Expand Down Expand Up @@ -486,6 +508,7 @@ mod legacy {
//! This module defines some types used to parse the landscape data file in
//! legacy format and convert it to the new one.

use super::ItemAudit;
use serde::{Deserialize, Serialize};

/// Landscape data (legacy format).
Expand Down Expand Up @@ -538,6 +561,8 @@ mod legacy {
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub(super) struct ItemExtra {
pub accepted: Option<String>,
pub archived: Option<String>,
pub audits: Option<Vec<ItemAudit>>,
pub annual_review_date: Option<String>,
pub annual_review_url: Option<String>,
pub artwork_url: Option<String>,
Expand Down
21 changes: 10 additions & 11 deletions src/github.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use regex::Regex;
use reqwest::header;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::env;
use tracing::{debug, instrument, warn};

/// File used to cache data collected from GitHub.
Expand All @@ -26,14 +27,13 @@ const GITHUB_CACHE_FILE: &str = "github.json";
/// How long the GitHub data in the cache is valid (in days).
const GITHUB_CACHE_TTL: i64 = 7;

/// Environment variable containing a comma separated list of GitHub tokens.
const GITHUB_TOKENS: &str = "GITHUB_TOKENS";

/// Collect GitHub data for each of the items repositories in the landscape,
/// reusing cached data whenever possible.
#[instrument(skip_all, err)]
pub(crate) async fn collect_github_data(
cache: &Cache,
tokens: &Option<Vec<String>>,
landscape_data: &LandscapeData,
) -> Result<GithubData> {
pub(crate) async fn collect_github_data(cache: &Cache, landscape_data: &LandscapeData) -> Result<GithubData> {
debug!("collecting repositories information from github (this may take a while)");

// Read cached data (if available)
Expand All @@ -45,7 +45,10 @@ pub(crate) async fn collect_github_data(
};

// Setup GitHub API clients pool if any tokens have been provided
let gh_pool: Option<Pool<DynGH>> = if let Some(tokens) = tokens {
let tokens: Result<Vec<String>> = env::var(GITHUB_TOKENS)
.map(|t| t.split(',').map(ToString::to_string).collect())
.map_err(Into::into);
let gh_pool: Option<Pool<DynGH>> = if let Ok(tokens) = &tokens {
let mut gh_clients: Vec<DynGH> = vec![];
for token in tokens {
let gh = Box::new(GHApi::new(token)?);
Expand All @@ -70,11 +73,7 @@ pub(crate) async fn collect_github_data(
urls.dedup();

// Collect repositories information from GitHub, reusing cached data when available
let concurrency = if let Some(tokens) = tokens {
tokens.len()
} else {
1
};
let concurrency = if let Ok(tokens) = tokens { tokens.len() } else { 1 };
let github_data: GithubData = stream::iter(urls)
.map(|url| async {
let url = url.clone();
Expand Down
3 changes: 3 additions & 0 deletions src/logos.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
//! This module provides some helper functions to prepare logos to be displayed
//! on the landscape web application.

use crate::{cache::Cache, LogosSource};
use anyhow::{format_err, Result};
use lazy_static::lazy_static;
Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ mod data;
mod datasets;
mod github;
mod logos;
mod projects;
mod s3;
mod settings;
mod tmpl;

/// CLI arguments.
#[derive(Parser)]
Expand Down
Loading

0 comments on commit b731b4e

Please sign in to comment.