Skip to content

Commit

Permalink
Generate individual data files for each item
Browse files Browse the repository at this point in the history
Signed-off-by: Sergio Castaño Arteaga <[email protected]>
  • Loading branch information
tegioz committed Sep 3, 2023
1 parent 68bc285 commit 009246d
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 35 deletions.
25 changes: 18 additions & 7 deletions src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ pub(crate) async fn build(args: &BuildArgs) -> Result<()> {
landscape_data.add_crunchbase_data(crunchbase_data)?;
landscape_data.add_github_data(github_data)?;

// Generate items data files
generate_items_data_files(&landscape_data, &args.output_dir)?;

// Generate datasets for web application
let datasets = generate_datasets(&landscape_data, &settings, includes_guide, &args.output_dir)?;

Expand Down Expand Up @@ -153,9 +156,7 @@ fn copy_web_assets(output_dir: &Path) -> Result<()> {
}

/// Generate datasets from the landscape data and settings, as well as from the
/// data collected from external services (GitHub, Crunchbase, etc). Some of
/// the datasets will be embedded in the index document, and the rest will be
/// written to the DATASETS_PATH in the output directory.
/// data collected from external services (GitHub, Crunchbase, etc).
#[instrument(skip_all, err)]
fn generate_datasets(
landscape_data: &LandscapeData,
Expand All @@ -172,13 +173,23 @@ fn generate_datasets(
let mut base_file = File::create(datasets_path.join("base.json"))?;
base_file.write_all(&serde_json::to_vec(&datasets.base)?)?;

// Full
let mut full_file = File::create(datasets_path.join("full.json"))?;
full_file.write_all(&serde_json::to_vec(&datasets.full)?)?;

Ok(datasets)
}

/// Generate items data files from the landscape data.
#[instrument(skip_all, err)]
fn generate_items_data_files(landscape_data: &LandscapeData, output_dir: &Path) -> Result<()> {
debug!("generating items data files");

let datasets_path = output_dir.join(DATASETS_PATH);
for item in &landscape_data.items {
let mut item_file = File::create(datasets_path.join(format!("item_{}.json", item.id)))?;
item_file.write_all(&serde_json::to_vec(item)?)?;
}

Ok(())
}

/// Generate the projects.md and projects.csv files from the landscape data.
#[instrument(skip_all, err)]
fn generate_projects_files(landscape_data: &LandscapeData, output_dir: &Path) -> Result<()> {
Expand Down
7 changes: 7 additions & 0 deletions src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,13 @@ impl Item {
let key = format!("{}##{}##{}", &self.category, &self.subcategory, &self.name);
self.id = Uuid::new_v5(&Uuid::NAMESPACE_OID, key.as_bytes());
}

/// Get primary repository if available.
pub(crate) fn get_primary_repository(&self) -> Option<&Repository> {
self.repositories
.as_ref()
.and_then(|repos| repos.iter().find(|r| r.primary.unwrap_or_default()))
}
}

/// Landscape item audit information.
Expand Down
87 changes: 59 additions & 28 deletions src/datasets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//! rendering it), whereas others will be written to the output directory so
//! that they can be fetched when needed.
use self::{base::Base, full::Full};
use self::base::Base;
use crate::{data::LandscapeData, settings::LandscapeSettings};
use anyhow::{Ok, Result};

Expand All @@ -14,9 +14,6 @@ use anyhow::{Ok, Result};
pub(crate) struct Datasets {
/// #[base]
pub base: Base,

/// #[full]
pub full: Full,
}

impl Datasets {
Expand All @@ -28,7 +25,6 @@ impl Datasets {
) -> Result<Self> {
let datasets = Datasets {
base: Base::new(landscape_data, settings, includes_guide),
full: Full::new(landscape_data.clone()),
};

Ok(datasets)
Expand All @@ -44,6 +40,7 @@ mod base {
data::{Category, CategoryName, ItemFeatured, LandscapeData, Maturity},
settings::{Colors, GridItemsSize, Group, Images, LandscapeSettings, SocialNetworks},
};
use chrono::NaiveDate;
use serde::{Deserialize, Serialize};
use uuid::Uuid;

Expand Down Expand Up @@ -79,16 +76,44 @@ mod base {
pub(crate) struct Item {
pub category: String,
pub has_repositories: bool,
pub homepage_url: String,
pub id: Uuid,
pub name: String,
pub logo: String,
pub subcategory: String,

#[serde(skip_serializing_if = "Option::is_none")]
pub accepted_at: Option<NaiveDate>,

#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,

#[serde(skip_serializing_if = "Option::is_none")]
pub devstats_url: Option<String>,

#[serde(skip_serializing_if = "Option::is_none")]
pub featured: Option<ItemFeatured>,

#[serde(skip_serializing_if = "Option::is_none")]
pub funding: Option<i64>,

#[serde(skip_serializing_if = "Option::is_none")]
pub github_stars: Option<i64>,

#[serde(skip_serializing_if = "Option::is_none")]
pub maturity: Option<Maturity>,

#[serde(skip_serializing_if = "Option::is_none")]
pub member_subcategory: Option<String>,

#[serde(skip_serializing_if = "Option::is_none")]
pub organization: Option<String>,

#[serde(skip_serializing_if = "Option::is_none")]
pub primary_repo_url: Option<String>,

#[serde(skip_serializing_if = "Option::is_none")]
pub twitter_url: Option<String>,
}

impl Base {
Expand Down Expand Up @@ -122,42 +147,48 @@ mod base {

// Prepare items from landscape data
for item in &landscape_data.items {
// Prepare item description
let mut description = item
.get_primary_repository()
.and_then(|r| r.github_data.as_ref().map(|gh| gh.description.clone()));
if description.is_none() {
description = item.crunchbase_data.as_ref().and_then(|cb| cb.description.clone());
}

// Count stars across all repositories
let github_stars = item.repositories.as_ref().map(|repos| {
repos.iter().fold(0, |acc, r| {
acc + if let Some(gh_data) = &r.github_data {
gh_data.stars
} else {
0
}
})
});

base.items.push(Item {
accepted_at: item.accepted_at,
category: item.category.clone(),
description,
devstats_url: item.devstats_url.clone(),
featured: item.featured.clone(),
funding: item.crunchbase_data.as_ref().and_then(|cb| cb.funding),
github_stars,
has_repositories: !item.repositories.as_ref().unwrap_or(&vec![]).is_empty(),
homepage_url: item.homepage_url.clone(),
id: item.id,
name: item.name.clone(),
logo: item.logo.clone(),
maturity: item.maturity.clone(),
member_subcategory: item.member_subcategory.clone(),
organization: item.crunchbase_data.as_ref().and_then(|cb| cb.name.clone()),
primary_repo_url: item.get_primary_repository().map(|r| r.url.clone()),
subcategory: item.subcategory.clone(),
twitter_url: item.twitter_url.clone(),
});
}

base
}
}
}

/// Full dataset.
///
/// This dataset contains all the information available for the landscape. This
/// information is used by the web application to power features that require
/// some extra data not available in the base dataset.
mod full {
use crate::data::{Item, LandscapeData};
use serde::{Deserialize, Serialize};

#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub(crate) struct Full {
#[serde(skip_serializing_if = "Vec::is_empty")]
pub items: Vec<Item>,
}

impl Full {
/// Create a new Full instance from the landscape data provided.
pub(crate) fn new(data: LandscapeData) -> Self {
Full { items: data.items }
}
}
}

0 comments on commit 009246d

Please sign in to comment.