Merge pull request #26 from claromes/1.0a7

claromes · web-flow · commit 2428e7d70d45 · 2024-07-04T16:49:22.000-03:00
v1.0a7
diff --git a/CITATION.cff b/CITATION.cff
@@ -13,7 +13,7 @@ authors:
     email: support@claromes.com
 identifiers:
   - type: doi
-    value: 10.5281/zenodo.12528448
+    value: 10.5281/zenodo.12528447
     description: The concept DOI of the work.
   - type: url
     value: "https://pypi.org/project/waybacktweets/"
diff --git a/README.md b/README.md
@@ -1,8 +1,9 @@
 # Wayback Tweets
 
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12528448.svg)](https://doi.org/10.5281/zenodo.12528448) [![PyPI](https://img.shields.io/pypi/v/waybacktweets)](https://pypi.org/project/waybacktweets) [![docs](https://github.com/claromes/waybacktweets/actions/workflows/docs.yml/badge.svg)](https://github.com/claromes/waybacktweets/actions/workflows/docs.yml) [![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://waybacktweets.streamlit.app)
+[![PyPI](https://img.shields.io/pypi/v/waybacktweets)](https://pypi.org/project/waybacktweets) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12528447.svg)](https://doi.org/10.5281/zenodo.12528447) [![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://waybacktweets.streamlit.app) [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1zRqi6uTMiGi5z8GQ-PC0tbpCJWULCqMO?usp=sharing)
 
-Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see [Field Options](https://claromes.github.io/waybacktweets/field_options.html)), and saves the data in HTML (for easy viewing of the tweets using the `iframe` tag), CSV, and JSON formats.
+
+Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see [Field Options](https://claromes.github.io/waybacktweets/field_options.html)), and saves the data in HTML, for easy viewing of the tweets using the iframe tags, CSV, and JSON formats.
 
 ## Installation
 
@@ -57,7 +58,7 @@ if archived_tweets:
 ## Acknowledgements
 
 - Tristan Lee (Bellingcat's Data Scientist) for the idea of the application.
-- Jessica Smith (Snowflake's Marketing Specialist) and Streamlit/Snowflake teams for the additional server resources on Streamlit Cloud.
+- Jessica Smith (Snowflake's Community Growth Specialist) and Streamlit/Snowflake team for the additional server resources on Streamlit Cloud.
 - OSINT Community for recommending the application.
 
 > [!NOTE]
diff --git a/app/app.py b/app/app.py
@@ -34,7 +34,7 @@
     layout="centered",
     menu_items={
         "About": f"""
-    [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md) [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)
+    [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md)
 
     The application is a prototype hosted on Streamlit Cloud, serving as an alternative to the command line tool.
 
@@ -168,16 +168,12 @@ def scroll_page():
 
 # ------ User Interface Settings ------ #
 
-st.info(
-    "🥳 [**Pre-release 1.0x: Python module, CLI, and new Streamlit app**](https://github.com/claromes/waybacktweets/releases)"  # noqa: E501
-)
-
 st.image(TITLE, use_column_width="never")
 st.caption(
-    "[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)"  # noqa: E501
+    "[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![sponsor](https://img.shields.io/badge/Donate-via%20Sponsors-ff69b4.svg?logo=github)](https://github.com/sponsors/claromes)"  # noqa: E501
 )
 st.write(
-    "Retrieves archived tweets CDX data in HTML (for easy viewing of the tweets using the `iframe` tag), CSV, and JSON formats."  # noqa: E501
+    "Retrieves archived tweets CDX data in HTML (for easy viewing of the tweets using the iframe tag), CSV, and JSON formats."  # noqa: E501
 )
 
 st.write(
@@ -291,15 +287,15 @@ def scroll_page():
 
         # -- Rendering -- #
 
-        if csv_data and json_data and html_content:
-            st.session_state.count = len(df)
-            st.write(f"**{st.session_state.count} URLs have been captured**")
+        st.session_state.count = len(df)
+        st.write(f"**{st.session_state.count} URLs have been captured**")
 
-            # -- HTML -- #
+        tab1, tab2, tab3 = st.tabs(["HTML", "CSV", "JSON"])
 
-            st.header("HTML", divider="gray", anchor=False)
+        # -- HTML -- #
+        with tab1:
             st.write(
-                f"Visualize tweets more efficiently through `iframes`. Download the @{st.session_state.current_username}'s archived tweets in HTML."  # noqa: E501
+                f"Visualize tweets more efficiently through iframe tags. Download the @{st.session_state.current_username}'s archived tweets in HTML."  # noqa: E501
             )
 
             col5, col6 = st.columns([1, 18])
@@ -317,8 +313,7 @@ def scroll_page():
                 )
 
             # -- CSV -- #
-
-            st.header("CSV", divider="gray", anchor=False)
+        with tab2:
             st.write(
                 "Check the data returned in the dataframe below and download the file."
             )
@@ -340,8 +335,7 @@ def scroll_page():
             st.dataframe(df, use_container_width=True)
 
             # -- JSON -- #
-
-            st.header("JSON", divider="gray", anchor=False)
+        with tab3:
             st.write(
                 "Check the data returned in JSON format below and download the file."
             )
diff --git a/docs/conf.py b/docs/conf.py
@@ -20,6 +20,7 @@
     "sphinx_new_tab_link",
     "sphinx_click.ext",
     "sphinx_autodoc_typehints",
+    "sphinxcontrib.youtube",
 ]
 
 templates_path = ["_templates"]
diff --git a/docs/contribute.rst b/docs/contribute.rst
@@ -19,7 +19,7 @@ These are the prerequisites:
 - Python 3.10+
 - Poetry
 
-Install from the source, following the :ref:`installation` instructions.
+Install from the source, following the :ref:`installation_from_source` instructions.
 
 Brief explanation about the code under the Wayback Tweets directory:
 
diff --git a/docs/handson.rst b/docs/handson.rst
@@ -0,0 +1,22 @@
+Hands-On Examples
+====================
+
+- **Notebook**
+
+   This notebook demonstrates how to fetch, parse, and export archived tweets for a specific user using the ``waybacktweets`` library.
+
+   .. image:: https://colab.research.google.com/assets/colab-badge.svg
+      :target: https://colab.research.google.com/drive/1zRqi6uTMiGi5z8GQ-PC0tbpCJWULCqMO?usp=sharing
+      :alt: Open In Collab
+
+.. raw:: html
+
+   <br>
+   <br>
+
+- **Video**
+
+   Demonstration of how to use Wayback Tweets and other tools to retrieve tweets (in Spanish)
+
+   ..  youtube:: qy3wOnUxe6A
+      :width: 100%
diff --git a/docs/index.rst b/docs/index.rst
@@ -9,10 +9,11 @@ Wayback Tweets
 
 Pre-release: |release|
 
-Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see :ref:`field_options`), and saves the data in HTML (for easy viewing of the tweets using the ``iframe`` tag), CSV, and JSON formats.
+Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see :ref:`field_options`), and saves the data in HTML, for easy viewing of the tweets using the iframe tags, CSV, and JSON formats.
 
-.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.12528448.svg
-  :target: https://doi.org/10.5281/zenodo.12528448
+.. image:: https://img.shields.io/badge/Donate-via%20Sponsors-ff69b4.svg?logo=github
+  :target: https://github.com/sponsors/claromes
+  :alt: GitHub Sponsors
 
 .. note::
     Intensive queries can lead to rate limiting, resulting in a temporary ban of a few minutes from web.archive.org.
@@ -30,6 +31,7 @@ User Guide
     field_options
     outputs
     exceptions
+    handson
     contribute
     todo
 
diff --git a/docs/installation.rst b/docs/installation.rst
@@ -1,8 +1,7 @@
-.. _installation:
-
 Installation
 ================
 
+**It is compatible with Python versions 3.10 and above.**
 
 Using pip
 ------------
@@ -11,47 +10,68 @@ Using pip
 
         pip install waybacktweets
 
+Using Poetry
+------------
+
+    .. code-block:: shell
+
+        poetry add waybacktweets
+
+.. _installation_from_source:
+
 From source
 -------------
 
-    Clone the repository:
+    **Clone the repository:**
 
     .. code-block:: shell
 
         git clone git@github.com:claromes/waybacktweets.git
 
-    Change directory:
+    **Change directory:**
 
     .. code-block:: shell
 
         cd waybacktweets
 
-    Install poetry, if you haven't already:
+    **Install Poetry, if you haven't already:**
 
     .. code-block:: shell
 
         pip install poetry
 
 
-    Install the dependencies:
+    **Install the dependencies:**
 
     .. code-block:: shell
 
         poetry install
 
-    Run the CLI:
+    **Install the pre-commit:**
+
+    .. code-block:: shell
+
+        poetry run pre-commit install
+
+    **Run the CLI:**
 
     .. code-block:: shell
 
         poetry run waybacktweets [SUBCOMMANDS]
 
-    Run the Streamlit App:
+    **Starts a new shell and activates the virtual environment:**
+
+    .. code-block:: shell
+
+        poetry shell
+
+    **Run the Streamlit App:**
 
     .. code-block:: shell
 
         streamlit run app/app.py
 
-    Build the docs:
+    **Build the docs:**
 
     .. code-block:: shell
 
diff --git a/legacy_app/legacy_app.py b/legacy_app/legacy_app.py
@@ -14,11 +14,7 @@
     layout="centered",
     menu_items={
         "About": """
-        ## 🏛️ Wayback Tweets
-
-        Tool that displays, via Wayback CDX Server API, multiple archived tweets on Wayback Machine to avoid opening each link manually. Users can apply filters based on specific years and view tweets that do not have the original URL available.
-
-        This tool is a prototype, please feel free to send your [feedbacks](https://github.com/claromes/waybacktweets/issues). Created by [@claromes](https://claromes.com).
+        This is the legacy application of [Wayback Tweets](https://waybacktweets.streamlit.app/).
 
         -------
         """,  # noqa: E501
@@ -386,7 +382,7 @@ def next_page():
 
 # UI
 st.title(
-    "Wayback Tweets [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)",  # noqa: E501
+    "Wayback Tweets",  # noqa: E501
     anchor=False,
     help="v0.4.3",
 )
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "waybacktweets"
-version = "1.0a6"
+version = "1.0a7"
 description = "Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data."
 authors = ["Claromes <support@claromes.com>"]
 license = "GPLv3"
@@ -46,6 +46,7 @@ sphinxcontrib-mermaid = "^0.9.2"
 sphinx-new-tab-link = "^0.4.0"
 sphinx-click = "^6.0.0"
 sphinx-autodoc-typehints = "^2.1.1"
+sphinxcontrib-youtube = "^1.4.1"
 
 [tool.poetry.group.dev.dependencies]
 streamlit = "1.36.0"
diff --git a/waybacktweets/_cli.py b/waybacktweets/_cli.py
@@ -97,7 +97,7 @@ def _parse_date(
     "verbose",
     is_flag=True,
     default=False,
-    help="Shows the error log.",
+    help="Shows the log.",
 )
 def main(
     username: str,

Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@`
`20`	`20`	`"sphinx_new_tab_link",`
`21`	`21`	`"sphinx_click.ext",`
`22`	`22`	`"sphinx_autodoc_typehints",`
	`23`	`+ "sphinxcontrib.youtube",`
`23`	`24`	`]`
`24`	`25`
`25`	`26`	`templates_path = ["_templates"]`