wip

owid · Jul 21, 2023 · c9c83e3 · c9c83e3
1 parent a33dcce
commit c9c83e3
Showing 1 changed file with 77 additions and 67 deletions.
diff --git a/walkthrough/snapshot.py b/walkthrough/snapshot.py
@@ -20,33 +20,44 @@ class Options(Enum):
     DATASET_MANUAL_IMPORT = "Import dataset from local file"
 
 
+class OriginForm(BaseModel):
+    dataset_title_owid: str
+    dataset_description_owid: str
+    dataset_description_producer: str
+    producer: str
+    citation_producer: str
+    dataset_url_main: str
+    dataset_url_download: str
+    date_accessed: str
+    date_published: str
+
+
+class LicenseForm(BaseModel):
+    url: str
+    name: str
+
+
 class SnapshotForm(BaseModel):
     namespace: str
     snapshot_version: str
     short_name: str
-    name: str
-    source_name: str
-    source_published_by: str
-    publication_year: Optional[str]
-    publication_date: Optional[str]
-    url: str
-    source_data_url: str
-    file_extension: str
-    license_name: str
-    license_url: str
-    description: str
+    origin: OriginForm
+    license: LicenseForm
     is_private: bool
     dataset_manual_import: bool
 
     def __init__(self, **data: Any) -> None:
         options = data.pop("options")
         data["is_private"] = Options.IS_PRIVATE.value in options
         data["dataset_manual_import"] = Options.DATASET_MANUAL_IMPORT.value in options
-        super().__init__(**data)
 
-    @property
-    def version(self) -> str:
-        return self.snapshot_version or self.publication_year or self.publication_date  # type: ignore
+        # TODO: extract fields for origin
+        __import__("ipdb").set_trace()
+        data["origin"] = OriginForm(**data.pop("origin"))
+
+        # TODO: extract fields for license
+
+        super().__init__(**data)
 
 
 def app(run_checks: bool) -> None:
@@ -74,15 +85,15 @@ def app(run_checks: bool) -> None:
                 help_text="Institution name. Example: emdat",
             ),
             pi.input(
-                "Snapshot version",
+                "Version",
                 name="snapshot_version",
                 placeholder=str(dt.date.today()),
                 required=True,
                 value=state.get("snapshot_version", str(dt.date.today())),
                 help_text="Version of the snapshot dataset (by default, the current date, or exceptionally the publication date).",
             ),
             pi.input(
-                "Snapshot dataset short name",
+                "Short name",
                 name="short_name",
                 placeholder="testing_dataset_name",
                 required=True,
@@ -91,79 +102,78 @@ def app(run_checks: bool) -> None:
                 help_text="Underscored dataset short name. Example: natural_disasters",
             ),
             pi.input(
-                "Dataset full name",
-                name="name",
-                placeholder="Testing Dataset Name (Institution, 2023)",
-                required=True,
-                value=state.get("name"),
-                help_text="Human-readable dataset name, followed by (Institution, Year of version). Example: Natural disasters (EMDAT, 2022)",
-            ),
-            pi.input(
-                "Source short citation",
-                name="source_name",
-                placeholder="Testing Short Citation",
-                required=True,
-                value=state.get("source_name"),
-                help_text="Short source citation (to show in charts). Example: EM-DAT",
-            ),
-            pi.input(
-                "Source full citation",
-                name="source_published_by",
-                placeholder="Testing Full Citation",
-                required=True,
-                value=state.get("source_published_by"),
-                help_text="Testing Full Citation, as recommended by the source. Example: EM-DAT, CRED / UCLouvain, Brussels, Belgium",
+                "Origin: Dataset title OWID",
+                name="dataset_title_owid",
+                placeholder="Testing Title OWID",
+                value=state.get("dataset_title_owid"),
+                help_text="Original title from the source. Example: Natural disasters",
             ),
             pi.input(
-                "Publication date",
-                name="publication_date",
-                placeholder="",
-                value=state.get("publication_date"),
-                help_text="Date when the dataset was published by the source. Example: 2023-01-01",
+                "Origin: Producer",
+                name="producer",
+                placeholder="Testing Institution",
+                value=state.get("producer"),
+                help_text="The name of the institution (without a year) or the main authors of the paper. Example: EM-DAT",
             ),
+            # TODO: should it contain year too? it would be good to clarify
             pi.input(
-                "Publication year",
-                name="publication_year",
-                type=pi.NUMBER,
-                placeholder="",
-                help_text="Only if the exact publication date is unknown, year when the dataset was published by the source. Example: 2023",
+                "Origin: Citation by the producer",
+                name="citation_producer",
+                placeholder="Testing Citation",
+                value=state.get("citation_producer"),
+                help_text="The full citation that the producer asks for. Example: EM-DAT, CRED / UCLouvain, Brussels, Belgium",
             ),
             pi.input(
-                "Dataset webpage URL",
-                name="url",
-                placeholder=("https://url_of_testing_source.com/"),
-                required=True,
-                value=state.get("url"),
+                "Origin: Dataset main URL",
+                name="dataset_url_main",
+                placeholder="https://url_of_testing_source.com/",
+                value=state.get("dataset_url_main"),
                 help_text="URL to the main page of the project.",
             ),
             pi.input(
-                "Dataset download URL",
-                name="source_data_url",
+                "Origin: Dataset download URL",
+                name="dataset_url_download",
                 placeholder="https://url_of_testing_source.com/data.csv",
-                value=state.get("source_data_url"),
-                help_text="URL to download the data file.",
+                value=state.get("dataset_url_download"),
+                help_text="Direct URL to download the dataset.",
             ),
             pi.input(
-                "File extension",
-                name="file_extension",
-                placeholder="csv",
-                value=state.get("file_extension"),
-                help_text="File extension (without the '.') of the file to be downloaded. Example: csv",
+                "Origin: Publication date",
+                name="date_published",
+                placeholder="2023-01-01",
+                value=state.get("date_published"),
+                help_text="Date when the dataset was published, could be date or year. Example: 2023-01-01 or 2023",
+            ),
+            pi.textarea(
+                "Origin: Dataset description OWID",
+                name="dataset_description_owid",
+                value=state.get("dataset_description_owid"),
+                help_text="Our description of the dataset.",
+            ),
+            pi.textarea(
+                "Origin: Dataset description by the producer",
+                name="dataset_description_producer",
+                value=state.get("dataset_description_producer"),
+                help_text="The description for this dataset used by the producer.",
             ),
             pi.input(
-                "License URL",
+                "License: URL",
                 name="license_url",
                 placeholder=("https://url_of_testing_source.com/license"),
                 help_text="URL to the page where the source specifies the license of the dataset.",
             ),
             pi.input(
-                "License name",
+                "License: name",
                 name="license_name",
                 placeholder="Creative Commons BY 4.0",
                 help_text="Name of the dataset license. Example: 'Creative Commons BY 4.0'",
             ),
-            pi.textarea(
-                "Description", name="description", value=state.get("description"), help_text="Dataset description."
+            pi.input(
+                "File extension",
+                name="file_extension",
+                placeholder="csv",
+                value=state.get("file_extension"),
+                help_text="File extension (without the '.') of the file to be downloaded. Example: csv",
             ),
             pi.checkbox(
                 "Other options",