From 0b62d9213dc3850525269371431f098b079876e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Casta=C3=B1o=20Arteaga?= Date: Tue, 5 Sep 2023 20:27:50 +0200 Subject: [PATCH] Some refactoring in CLI (#159) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sergio CastaƱo Arteaga --- src/{ => build}/cache.rs | 4 +- src/{ => build}/crunchbase.rs | 37 ++++++++------- src/{ => build}/data.rs | 67 +++++++++++++++++++++------- src/{ => build}/datasets.rs | 9 ++-- src/{ => build}/github.rs | 37 +++++++-------- src/{ => build}/guide.rs | 0 src/{ => build}/logos.rs | 3 +- src/{build.rs => build/mod.rs} | 20 ++++++--- src/{ => build}/projects.rs | 2 +- src/{ => build}/settings.rs | 10 ++--- src/deploy/mod.rs | 3 ++ src/{ => deploy}/s3.rs | 0 src/main.rs | 12 +---- src/{validate.rs => validate/mod.rs} | 2 +- 14 files changed, 121 insertions(+), 85 deletions(-) rename src/{ => build}/cache.rs (94%) rename src/{ => build}/crunchbase.rs (94%) rename src/{ => build}/data.rs (94%) rename src/{ => build}/datasets.rs (95%) rename src/{ => build}/github.rs (96%) rename src/{ => build}/guide.rs (100%) rename src/{ => build}/logos.rs (98%) rename src/{build.rs => build/mod.rs} (97%) rename src/{ => build}/projects.rs (98%) rename src/{ => build}/settings.rs (98%) create mode 100644 src/deploy/mod.rs rename src/{ => deploy}/s3.rs (100%) rename src/{validate.rs => validate/mod.rs} (90%) diff --git a/src/cache.rs b/src/build/cache.rs similarity index 94% rename from src/cache.rs rename to src/build/cache.rs index ff1578a1..1d6c744e 100644 --- a/src/cache.rs +++ b/src/build/cache.rs @@ -1,3 +1,5 @@ +//! This module defines the cache used to cache files across builds. + use anyhow::{format_err, Result}; use std::{fs, io::Write, path::PathBuf}; use tracing::instrument; @@ -8,7 +10,7 @@ const CACHE_PATH: &str = "landscape"; /// Cache used to store data collected from external services. #[derive(Debug, Clone, Default)] pub(crate) struct Cache { - pub cache_dir: PathBuf, + cache_dir: PathBuf, } impl Cache { diff --git a/src/crunchbase.rs b/src/build/crunchbase.rs similarity index 94% rename from src/crunchbase.rs rename to src/build/crunchbase.rs index a12d657f..2d4a37c7 100644 --- a/src/crunchbase.rs +++ b/src/build/crunchbase.rs @@ -2,7 +2,7 @@ //! from Crunchbase for each of the landscape items (when applicable), as well //! as the functionality used to collect that information. -use crate::{cache::Cache, data::LandscapeData}; +use super::{cache::Cache, LandscapeData}; use anyhow::{format_err, Result}; use async_trait::async_trait; use chrono::{DateTime, Utc}; @@ -117,11 +117,13 @@ pub(crate) async fn collect_crunchbase_data( pub(crate) type CrunchbaseData = HashMap; /// Type alias to represent a crunchbase url. -pub(crate) type CrunchbaseUrl = String; +type CrunchbaseUrl = String; /// Organization information collected from Crunchbase. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] pub(crate) struct Organization { + pub generated_at: DateTime, + #[serde(skip_serializing_if = "Option::is_none")] pub city: Option, @@ -137,9 +139,6 @@ pub(crate) struct Organization { #[serde(skip_serializing_if = "Option::is_none")] pub funding: Option, - /// Represents the moment at which this instance was generated - pub generated_at: DateTime, - #[serde(skip_serializing_if = "Option::is_none")] pub homepage_url: Option, @@ -177,8 +176,8 @@ pub(crate) struct Organization { impl Organization { /// Create a new Organization instance from information obtained from the /// Crunchbase API. - pub(crate) async fn new(cb: DynCB, cb_url: &str) -> Result { - // Collec some information from Crunchbase + async fn new(cb: DynCB, cb_url: &str) -> Result { + // Collect some information from Crunchbase let permalink = get_permalink(cb_url)?; let cb_org = cb.get_organization(&permalink).await?; @@ -224,24 +223,24 @@ impl Organization { const CRUNCHBASE_API_URL: &str = "https://api.crunchbase.com/api/v4"; /// Type alias to represent a CB trait object. -pub(crate) type DynCB = Arc; +type DynCB = Arc; /// Trait that defines some operations a CB implementation must support. #[async_trait] #[cfg_attr(test, automock)] -pub(crate) trait CB { +trait CB { /// Get organization information. async fn get_organization(&self, permalink: &str) -> Result; } /// CB implementation backed by the Crunchbase API. -pub struct CBApi { +struct CBApi { http_client: reqwest::Client, } impl CBApi { /// Create a new CBApi instance. - pub fn new(key: &str) -> Result { + fn new(key: &str) -> Result { // Setup HTTP client ready to make requests to the Crunchbase API let user_agent = format!("{}/{}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); let mut headers = header::HeaderMap::new(); @@ -286,13 +285,13 @@ impl CB for CBApi { } #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] -pub(crate) struct CBOrganizationEntity { +struct CBOrganizationEntity { properties: CBOrganization, cards: CBCards, } #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] -pub(crate) struct CBOrganization { +struct CBOrganization { categories: Option>, company_type: Option, funding_total: Option, @@ -307,32 +306,32 @@ pub(crate) struct CBOrganization { } #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] -pub(crate) struct CBEntityIdentifier { +struct CBEntityIdentifier { value: Option, } #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] -pub(crate) struct CBFundingTotal { +struct CBFundingTotal { value_usd: Option, } #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] -pub(crate) struct CBValue { +struct CBValue { value: Option, } #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] -pub(crate) struct CBCards { +struct CBCards { headquarters_address: Option>, } #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] -pub(crate) struct CBAddress { +struct CBAddress { location_identifiers: Option>, } #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] -pub(crate) struct CBLocationIdentifier { +struct CBLocationIdentifier { location_type: Option, value: Option, } diff --git a/src/data.rs b/src/build/data.rs similarity index 94% rename from src/data.rs rename to src/build/data.rs index 6ab375d9..2b42554b 100644 --- a/src/data.rs +++ b/src/build/data.rs @@ -7,12 +7,12 @@ //! backwards compatibility, this module provides a `legacy` submodule that //! allows parsing the legacy format and convert it to the new one. -use crate::{ +use super::{ crunchbase::{CrunchbaseData, Organization}, github::{self, GithubData}, settings::LandscapeSettings, - DataSource, }; +use crate::DataSource; use anyhow::{format_err, Result}; use chrono::NaiveDate; use reqwest::StatusCode; @@ -75,7 +75,7 @@ impl LandscapeData { Ok(LandscapeData::from(legacy_data)) } - /// Add Crunchbase data to the landscape data. + /// Add items Crunchbase data. #[instrument(skip_all, err)] pub(crate) fn add_crunchbase_data(&mut self, crunchbase_data: CrunchbaseData) -> Result<()> { for item in &mut self.items { @@ -133,7 +133,7 @@ impl LandscapeData { Ok(()) } - /// Add GitHub data to the landscape data. + /// Add items repositories GitHub data. #[instrument(skip_all, err)] pub(crate) fn add_github_data(&mut self, github_data: GithubData) -> Result<()> { for item in &mut self.items { @@ -199,14 +199,15 @@ impl From for LandscapeData { let mut item = Item { name: legacy_item.name, category: legacy_category.name.clone(), - subcategory: legacy_subcategory.name.clone(), crunchbase_url: legacy_item.crunchbase, + description: legacy_item.description.clone(), enduser: legacy_item.enduser, - homepage_url: legacy_item.homepage_url, joined_at: legacy_item.joined, + homepage_url: legacy_item.homepage_url, logo: legacy_item.logo, maturity: legacy_item.project, openssf_best_practices_url: legacy_item.url_for_bestpractices, + subcategory: legacy_subcategory.name.clone(), twitter_url: legacy_item.twitter, unnamed_organization: legacy_item.unnamed_organization, ..Default::default() @@ -240,7 +241,6 @@ impl From for LandscapeData { // Additional information in extra field if let Some(extra) = legacy_item.extra { item.accepted_at = extra.accepted; - item.latest_annual_review_at = extra.annual_review_date; item.archived_at = extra.archived; item.artwork_url = extra.artwork_url; item.audits = extra.audits; @@ -252,8 +252,9 @@ impl From for LandscapeData { item.github_discussions_url = extra.github_discussions_url; item.graduated_at = extra.graduated; item.incubating_at = extra.incubating; - item.mailing_list_url = extra.mailing_list_url; + item.latest_annual_review_at = extra.annual_review_date; item.latest_annual_review_url = extra.annual_review_url; + item.mailing_list_url = extra.mailing_list_url; item.slack_url = extra.slack_url; item.specification = extra.specification; item.stack_overflow_url = extra.stack_overflow_url; @@ -347,6 +348,9 @@ pub(crate) struct Item { #[serde(skip_serializing_if = "Option::is_none")] pub crunchbase_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub devstats_url: Option, @@ -418,6 +422,34 @@ pub(crate) struct Item { } impl Item { + /// Get item's description. + #[allow(dead_code)] + pub(crate) fn description(&self) -> Option<&String> { + // Use item's description if available + let mut description = self.description.as_ref(); + + // Otherwise, use primary repository description if available + if description.is_none() || description.unwrap().is_empty() { + description = + self.primary_repository().and_then(|r| r.github_data.as_ref().map(|gh| &gh.description)); + } + + // Otherwise, use Crunchbase data description + if description.is_none() || description.unwrap().is_empty() { + description = self.crunchbase_data.as_ref().and_then(|cb| cb.description.as_ref()); + } + + description + } + + /// Get primary repository if available. + #[allow(dead_code)] + pub(crate) fn primary_repository(&self) -> Option<&Repository> { + self.repositories + .as_ref() + .and_then(|repos| repos.iter().find(|r| r.primary.unwrap_or_default())) + } + /// Generate and set the item's id. fn set_id(&mut self) { let key = format!("{}##{}##{}", &self.category, &self.subcategory, &self.name); @@ -476,7 +508,7 @@ pub(crate) struct ItemSummary { /// Project maturity level. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] -pub(super) enum Maturity { +pub(crate) enum Maturity { Archived, Graduated, Incubating, @@ -528,7 +560,7 @@ mod legacy { //! legacy format and convert it to the new one. use super::{ItemAudit, Maturity}; - use crate::{crunchbase::CRUNCHBASE_URL, github::GITHUB_REPO_URL}; + use crate::build::{crunchbase::CRUNCHBASE_URL, github::GITHUB_REPO_URL}; use anyhow::{format_err, Context, Result}; use chrono::NaiveDate; use serde::{Deserialize, Serialize}; @@ -536,13 +568,13 @@ mod legacy { /// Landscape data (legacy format). #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] - pub(super) struct LandscapeData { + pub(crate) struct LandscapeData { pub landscape: Vec, } impl LandscapeData { /// Validate landscape data. - pub(super) fn validate(&self) -> Result<()> { + pub(crate) fn validate(&self) -> Result<()> { let mut items_seen = Vec::new(); for (category_index, category) in self.landscape.iter().enumerate() { @@ -603,27 +635,28 @@ mod legacy { /// Landscape category. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] - pub(super) struct Category { + pub(crate) struct Category { pub name: String, pub subcategories: Vec, } /// Landscape subcategory. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] - pub(super) struct SubCategory { + pub(crate) struct SubCategory { pub name: String, pub items: Vec, } /// Landscape item (project, product, member, etc). #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] - pub(super) struct Item { + pub(crate) struct Item { pub name: String, pub homepage_url: String, pub logo: String, pub additional_repos: Option>, pub branch: Option, pub crunchbase: Option, + pub description: Option, pub enduser: Option, pub extra: Option, pub joined: Option, @@ -636,14 +669,14 @@ mod legacy { /// Landscape item repository. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] - pub(super) struct Repository { + pub(crate) struct Repository { pub repo_url: String, pub branch: Option, } /// Extra information for a landscape item. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] - pub(super) struct ItemExtra { + pub(crate) struct ItemExtra { pub accepted: Option, pub archived: Option, pub audits: Option>, diff --git a/src/datasets.rs b/src/build/datasets.rs similarity index 95% rename from src/datasets.rs rename to src/build/datasets.rs index 7195bf18..9e5623b9 100644 --- a/src/datasets.rs +++ b/src/build/datasets.rs @@ -6,7 +6,7 @@ //! that they can be fetched when needed. use self::{base::Base, full::Full}; -use crate::{data::LandscapeData, settings::LandscapeSettings}; +use super::{settings::LandscapeSettings, LandscapeData}; use anyhow::{Ok, Result}; /// Datasets collection. @@ -40,13 +40,14 @@ impl Datasets { /// This dataset contains the minimal data the web application needs to render /// the initial page and power the features available on it. mod base { - use crate::{ + use crate::build::{ data::{Category, CategoryName, ItemFeatured, LandscapeData, Maturity}, settings::{Colors, GridItemsSize, Group, Images, LandscapeSettings, SocialNetworks}, }; use serde::{Deserialize, Serialize}; use uuid::Uuid; + /// Base dataset information. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] pub(crate) struct Base { pub foundation: String, @@ -75,6 +76,7 @@ mod base { pub social_networks: Option, } + /// Base dataset item information. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] pub(crate) struct Item { pub category: String, @@ -145,9 +147,10 @@ mod base { /// information is used by the web application to power features that require /// some extra data not available in the base dataset. mod full { - use crate::data::{Item, LandscapeData}; + use crate::build::data::{Item, LandscapeData}; use serde::{Deserialize, Serialize}; + /// Full dataset information. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] pub(crate) struct Full { #[serde(skip_serializing_if = "Vec::is_empty")] diff --git a/src/github.rs b/src/build/github.rs similarity index 96% rename from src/github.rs rename to src/build/github.rs index 163a495d..f3498fa9 100644 --- a/src/github.rs +++ b/src/build/github.rs @@ -2,8 +2,7 @@ //! from GitHub for each of the landscape items repositories (when applicable), //! as well as the functionality used to collect that information. -use crate::cache::Cache; -use crate::data::LandscapeData; +use super::{cache::Cache, LandscapeData}; use anyhow::{format_err, Result}; use async_trait::async_trait; use chrono::{DateTime, Utc}; @@ -134,29 +133,25 @@ pub(crate) struct Repository { pub contributors: Contributors, pub description: String, pub first_commit: Commit, - - /// Represents the moment at which this instance was generated pub generated_at: DateTime, + pub latest_commit: Commit, + pub participation_stats: Vec, + pub stars: i64, + pub url: String, #[serde(skip_serializing_if = "Option::is_none")] pub languages: Option>, - pub latest_commit: Commit, - #[serde(skip_serializing_if = "Option::is_none")] pub latest_release: Option, #[serde(skip_serializing_if = "Option::is_none")] pub license: Option, - - pub participation_stats: Vec, - pub stars: i64, - pub url: String, } impl Repository { /// Create a new Repository instance from information available on GitHub. - pub(crate) async fn new(gh: Object, repo_url: &str) -> Result { + async fn new(gh: Object, repo_url: &str) -> Result { // Collect some information from GitHub let (owner, repo) = get_owner_and_repo(repo_url)?; let gh_repo = gh.get_repository(&owner, &repo).await?; @@ -196,8 +191,8 @@ impl Repository { /// Commit information. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] pub(crate) struct Commit { - ts: Option>, - url: String, + pub ts: Option>, + pub url: String, } impl From for Commit { @@ -216,15 +211,15 @@ impl From for Commit { /// Contributors information. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] pub(crate) struct Contributors { - count: usize, - url: String, + pub count: usize, + pub url: String, } /// Release information. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] pub(crate) struct Release { - ts: Option>, - url: String, + pub ts: Option>, + pub url: String, } impl From for Release { @@ -240,12 +235,12 @@ impl From for Release { const GITHUB_API_URL: &str = "https://api.github.com"; /// Type alias to represent a GH trait object. -pub(crate) type DynGH = Box; +type DynGH = Box; /// Trait that defines some operations a GH implementation must support. #[async_trait] #[cfg_attr(test, automock)] -pub(crate) trait GH { +trait GH { /// Get number of repository contributors. async fn get_contributors_count(&self, owner: &str, repo: &str) -> Result; @@ -269,14 +264,14 @@ pub(crate) trait GH { } /// GH implementation backed by the GitHub API. -pub struct GHApi { +struct GHApi { gh_client: octorust::Client, http_client: reqwest::Client, } impl GHApi { /// Create a new GHApi instance. - pub fn new(token: &str) -> Result { + fn new(token: &str) -> Result { // Setup octorust GitHub API client let user_agent = format!("{}/{}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); let gh_client = octorust::Client::new(user_agent.clone(), Credentials::Token(token.to_string()))?; diff --git a/src/guide.rs b/src/build/guide.rs similarity index 100% rename from src/guide.rs rename to src/build/guide.rs diff --git a/src/logos.rs b/src/build/logos.rs similarity index 98% rename from src/logos.rs rename to src/build/logos.rs index cb32047b..a87c7546 100644 --- a/src/logos.rs +++ b/src/build/logos.rs @@ -1,7 +1,8 @@ //! This module provides some helper functions to prepare logos to be displayed //! on the landscape web application. -use crate::{cache::Cache, LogosSource}; +use super::cache::Cache; +use crate::LogosSource; use anyhow::{format_err, Result}; use lazy_static::lazy_static; use regex::bytes::Regex; diff --git a/src/build.rs b/src/build/mod.rs similarity index 97% rename from src/build.rs rename to src/build/mod.rs index 37488b5b..1dc1f89d 100644 --- a/src/build.rs +++ b/src/build/mod.rs @@ -2,18 +2,17 @@ #![allow(non_upper_case_globals)] -use crate::{ +use self::{ cache::Cache, crunchbase::collect_crunchbase_data, - data::LandscapeData, datasets::Datasets, github::collect_github_data, guide::LandscapeGuide, logos::prepare_logo, projects::{generate_projects_csv, Project, ProjectsMd}, settings::{Images, LandscapeSettings}, - BuildArgs, GuideSource, LogosSource, }; +use crate::{BuildArgs, GuideSource, LogosSource}; use anyhow::{format_err, Context, Result}; use askama::Template; use futures::stream::{self, StreamExt}; @@ -31,6 +30,17 @@ use tracing::{debug, error, info, instrument}; use url::Url; use uuid::Uuid; +mod cache; +mod crunchbase; +mod data; +mod datasets; +mod github; +mod guide; +mod logos; +mod projects; +mod settings; +pub(crate) use data::LandscapeData; + /// Path where the datasets will be written to in the output directory. const DATASETS_PATH: &str = "data"; @@ -334,7 +344,7 @@ async fn prepare_items_logos( #[derive(Debug, Clone, Template)] #[template(path = "index.html", escape = "none")] struct Index<'a> { - pub datasets: &'a Datasets, + datasets: &'a Datasets, } /// Render index file and write it to the output directory. @@ -391,7 +401,7 @@ mod filters { /// /// Based on the `json` built-in filter except the output is not pretty /// printed. - pub fn json_compact(s: S) -> askama::Result { + pub(crate) fn json_compact(s: S) -> askama::Result { let mut writer = JsonEscapeBuffer::new(); serde_json::to_writer(&mut writer, &s)?; Ok(writer.finish()) diff --git a/src/projects.rs b/src/build/projects.rs similarity index 98% rename from src/projects.rs rename to src/build/projects.rs index 3f6cae47..ffad3611 100644 --- a/src/projects.rs +++ b/src/build/projects.rs @@ -1,7 +1,7 @@ //! This module defines the functionality to generate the `projects.md` and //! `projects.csv` files from the information available in the landscape. -use crate::data::{LandscapeData, DATE_FORMAT}; +use super::{data::DATE_FORMAT, LandscapeData}; use anyhow::Result; use askama::Template; use chrono::NaiveDate; diff --git a/src/settings.rs b/src/build/settings.rs similarity index 98% rename from src/settings.rs rename to src/build/settings.rs index be0281f2..0981c9f9 100644 --- a/src/settings.rs +++ b/src/build/settings.rs @@ -7,10 +7,8 @@ //! NOTE: the landscape settings file uses a new format that is not backwards //! compatible with the legacy settings file used by existing landscapes. -use crate::{ - data::{Category, CategoryName}, - SettingsSource, -}; +use super::data::{Category, CategoryName}; +use crate::SettingsSource; use anyhow::{format_err, Result}; use reqwest::StatusCode; use serde::{Deserialize, Serialize}; @@ -114,10 +112,10 @@ pub(crate) struct FeaturedItemRuleOption { pub value: String, #[serde(skip_serializing_if = "Option::is_none")] - pub order: Option, + pub label: Option, #[serde(skip_serializing_if = "Option::is_none")] - pub label: Option, + pub order: Option, } /// Grid items size. diff --git a/src/deploy/mod.rs b/src/deploy/mod.rs new file mode 100644 index 00000000..76b4117f --- /dev/null +++ b/src/deploy/mod.rs @@ -0,0 +1,3 @@ +//! This module defines the functionality of the deploy CLI subcommand. + +pub(crate) mod s3; diff --git a/src/s3.rs b/src/deploy/s3.rs similarity index 100% rename from src/s3.rs rename to src/deploy/s3.rs diff --git a/src/main.rs b/src/main.rs index 87a396d6..3eb4cd3a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,20 +4,12 @@ use anyhow::Result; use build::build; use clap::{Args, Parser, Subcommand}; +use deploy::s3; use std::path::PathBuf; use validate::validate_data; mod build; -mod cache; -mod crunchbase; -mod data; -mod datasets; -mod github; -mod guide; -mod logos; -mod projects; -mod s3; -mod settings; +mod deploy; mod validate; /// CLI arguments. diff --git a/src/validate.rs b/src/validate/mod.rs similarity index 90% rename from src/validate.rs rename to src/validate/mod.rs index fa01edc1..48c157ff 100644 --- a/src/validate.rs +++ b/src/validate/mod.rs @@ -1,6 +1,6 @@ //! This module defines the functionality of the validate CLI subcommand. -use crate::{data::LandscapeData, DataSource}; +use crate::{build::LandscapeData, DataSource}; use anyhow::{Context, Result}; use tracing::instrument;