Generate individual data files for each item

Signed-off-by: Sergio Castaño Arteaga <[email protected]>
cncf · Sep 3, 2023 · 009246d · 009246d
1 parent 68bc285
commit 009246d
Show file tree

Hide file tree

Showing 3 changed files with 84 additions and 35 deletions.
diff --git a/src/build.rs b/src/build.rs
@@ -96,6 +96,9 @@ pub(crate) async fn build(args: &BuildArgs) -> Result<()> {
     landscape_data.add_crunchbase_data(crunchbase_data)?;
     landscape_data.add_github_data(github_data)?;
 
+    // Generate items data files
+    generate_items_data_files(&landscape_data, &args.output_dir)?;
+
     // Generate datasets for web application
     let datasets = generate_datasets(&landscape_data, &settings, includes_guide, &args.output_dir)?;
 
@@ -153,9 +156,7 @@ fn copy_web_assets(output_dir: &Path) -> Result<()> {
 }
 
 /// Generate datasets from the landscape data and settings, as well as from the
-/// data collected from external services (GitHub, Crunchbase, etc). Some of
-/// the datasets will be embedded in the index document, and the rest will be
-/// written to the DATASETS_PATH in the output directory.
+/// data collected from external services (GitHub, Crunchbase, etc).
 #[instrument(skip_all, err)]
 fn generate_datasets(
     landscape_data: &LandscapeData,
@@ -172,13 +173,23 @@ fn generate_datasets(
     let mut base_file = File::create(datasets_path.join("base.json"))?;
     base_file.write_all(&serde_json::to_vec(&datasets.base)?)?;
 
-    // Full
-    let mut full_file = File::create(datasets_path.join("full.json"))?;
-    full_file.write_all(&serde_json::to_vec(&datasets.full)?)?;
-
     Ok(datasets)
 }
 
+/// Generate items data files from the landscape data.
+#[instrument(skip_all, err)]
+fn generate_items_data_files(landscape_data: &LandscapeData, output_dir: &Path) -> Result<()> {
+    debug!("generating items data files");
+
+    let datasets_path = output_dir.join(DATASETS_PATH);
+    for item in &landscape_data.items {
+        let mut item_file = File::create(datasets_path.join(format!("item_{}.json", item.id)))?;
+        item_file.write_all(&serde_json::to_vec(item)?)?;
+    }
+
+    Ok(())
+}
+
 /// Generate the projects.md and projects.csv files from the landscape data.
 #[instrument(skip_all, err)]
 fn generate_projects_files(landscape_data: &LandscapeData, output_dir: &Path) -> Result<()> {

diff --git a/src/data.rs b/src/data.rs
@@ -423,6 +423,13 @@ impl Item {
         let key = format!("{}##{}##{}", &self.category, &self.subcategory, &self.name);
         self.id = Uuid::new_v5(&Uuid::NAMESPACE_OID, key.as_bytes());
     }
+
+    /// Get primary repository if available.
+    pub(crate) fn get_primary_repository(&self) -> Option<&Repository> {
+        self.repositories
+            .as_ref()
+            .and_then(|repos| repos.iter().find(|r| r.primary.unwrap_or_default()))
+    }
 }
 
 /// Landscape item audit information.

diff --git a/src/datasets.rs b/src/datasets.rs
@@ -5,7 +5,7 @@
 //! rendering it), whereas others will be written to the output directory so
 //! that they can be fetched when needed.
 
-use self::{base::Base, full::Full};
+use self::base::Base;
 use crate::{data::LandscapeData, settings::LandscapeSettings};
 use anyhow::{Ok, Result};
 
@@ -14,9 +14,6 @@ use anyhow::{Ok, Result};
 pub(crate) struct Datasets {
     /// #[base]
     pub base: Base,
-
-    /// #[full]
-    pub full: Full,
 }
 
 impl Datasets {
@@ -28,7 +25,6 @@ impl Datasets {
     ) -> Result<Self> {
         let datasets = Datasets {
             base: Base::new(landscape_data, settings, includes_guide),
-            full: Full::new(landscape_data.clone()),
         };
 
         Ok(datasets)
@@ -44,6 +40,7 @@ mod base {
         data::{Category, CategoryName, ItemFeatured, LandscapeData, Maturity},
         settings::{Colors, GridItemsSize, Group, Images, LandscapeSettings, SocialNetworks},
     };
+    use chrono::NaiveDate;
     use serde::{Deserialize, Serialize};
     use uuid::Uuid;
 
@@ -79,16 +76,44 @@ mod base {
     pub(crate) struct Item {
         pub category: String,
         pub has_repositories: bool,
+        pub homepage_url: String,
         pub id: Uuid,
         pub name: String,
         pub logo: String,
         pub subcategory: String,
 
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub accepted_at: Option<NaiveDate>,
+
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub description: Option<String>,
+
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub devstats_url: Option<String>,
+
         #[serde(skip_serializing_if = "Option::is_none")]
         pub featured: Option<ItemFeatured>,
 
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub funding: Option<i64>,
+
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub github_stars: Option<i64>,
+
         #[serde(skip_serializing_if = "Option::is_none")]
         pub maturity: Option<Maturity>,
+
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub member_subcategory: Option<String>,
+
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub organization: Option<String>,
+
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub primary_repo_url: Option<String>,
+
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub twitter_url: Option<String>,
     }
 
     impl Base {
@@ -122,42 +147,48 @@ mod base {
 
             // Prepare items from landscape data
             for item in &landscape_data.items {
+                // Prepare item description
+                let mut description = item
+                    .get_primary_repository()
+                    .and_then(|r| r.github_data.as_ref().map(|gh| gh.description.clone()));
+                if description.is_none() {
+                    description = item.crunchbase_data.as_ref().and_then(|cb| cb.description.clone());
+                }
+
+                // Count stars across all repositories
+                let github_stars = item.repositories.as_ref().map(|repos| {
+                    repos.iter().fold(0, |acc, r| {
+                        acc + if let Some(gh_data) = &r.github_data {
+                            gh_data.stars
+                        } else {
+                            0
+                        }
+                    })
+                });
+
                 base.items.push(Item {
+                    accepted_at: item.accepted_at,
                     category: item.category.clone(),
+                    description,
+                    devstats_url: item.devstats_url.clone(),
                     featured: item.featured.clone(),
+                    funding: item.crunchbase_data.as_ref().and_then(|cb| cb.funding),
+                    github_stars,
                     has_repositories: !item.repositories.as_ref().unwrap_or(&vec![]).is_empty(),
+                    homepage_url: item.homepage_url.clone(),
                     id: item.id,
                     name: item.name.clone(),
                     logo: item.logo.clone(),
                     maturity: item.maturity.clone(),
+                    member_subcategory: item.member_subcategory.clone(),
+                    organization: item.crunchbase_data.as_ref().and_then(|cb| cb.name.clone()),
+                    primary_repo_url: item.get_primary_repository().map(|r| r.url.clone()),
                     subcategory: item.subcategory.clone(),
+                    twitter_url: item.twitter_url.clone(),
                 });
             }
 
             base
         }
     }
 }
-
-/// Full dataset.
-///
-/// This dataset contains all the information available for the landscape. This
-/// information is used by the web application to power features that require
-/// some extra data not available in the base dataset.
-mod full {
-    use crate::data::{Item, LandscapeData};
-    use serde::{Deserialize, Serialize};
-
-    #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
-    pub(crate) struct Full {
-        #[serde(skip_serializing_if = "Vec::is_empty")]
-        pub items: Vec<Item>,
-    }
-
-    impl Full {
-        /// Create a new Full instance from the landscape data provided.
-        pub(crate) fn new(data: LandscapeData) -> Self {
-            Full { items: data.items }
-        }
-    }
-}