Merge pull request #100 from filips123/backend-improvements

Implement some backend improvements
filips123 · Aug 28, 2024 · da93d07 · da93d07
2 parents 35cc44d + 5a0ee05
commit da93d07
Show file tree

Hide file tree

Showing 17 changed files with 1,021 additions and 929 deletions.
diff --git a/.github/workflows/api.yaml b/.github/workflows/api.yaml
@@ -24,13 +24,12 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Configure Poetry cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: |
-            ~/.cache/black
             ~/.cache/pip
             ~/.cache/pypoetry
             ~/.local/share/pypoetry
@@ -40,9 +39,9 @@ jobs:
           restore-keys: ${{ runner.os }}-poetry-lint-
 
       - name: Install Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
 
       - name: Install Poetry
         run: |
@@ -53,22 +52,24 @@ jobs:
       - name: Install dependencies
         run: poetry install --extras sentry
 
-      - name: Lint the project with ruff
-        run: ruff gimvicurnik
+      - name: Lint the project with ruff check
+        if: always()
+        run: ruff check --output-format=github
 
-      - name: Lint the project with black
-        run: black gimvicurnik --check
+      - name: Lint the project with ruff format
+        if: always()
+        run: ruff format --check
 
   typecheck:
     name: Typechecking
     runs-on: ubuntu-latest
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Configure Poetry cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: |
             ~/.cache/pip
@@ -80,9 +81,9 @@ jobs:
           restore-keys: ${{ runner.os }}-poetry-typecheck-
 
       - name: Install Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
 
       - name: Install Poetry
         run: |

diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
@@ -39,14 +39,16 @@ jobs:
         with:
           path: |
             ~/.cache/yarn
+            ~/.yarn/berry/cache
             ./website/node_modules/.cache
+            ./website/node_modules/.vite
           key: ${{ runner.os }}-yarn-deploy-${{ hashFiles('**/yarn.lock') }}
           restore-keys: ${{ runner.os }}-yarn-
 
       - name: Install Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
 
       - name: Install Poetry
         run: |

diff --git a/.github/workflows/website.yaml b/.github/workflows/website.yaml
@@ -52,9 +52,11 @@ jobs:
         run: yarn install --immutable
 
       - name: Lint the project with ESLint
+        if: always()
         run: yarn lint
 
       - name: Lint the project with Prettier
+        if: always()
         run: yarn format
 
   typecheck:

diff --git a/API/README.md b/API/README.md
@@ -1,5 +1,5 @@
-GimVičUrnik
-===========
+GimVičUrnik - API
+=================
 
 An API for a school timetable, substitutions and menus at Gimnazija Vič.
 
@@ -58,10 +58,10 @@ You can retrieve all API routes using the `gimvicurnik routes` commands. The off
 
 ## Contributing
 
-The API uses ruff, black and mypy for linting the code. They are included in project's development dependencies.
+The API uses ruff for linting and formatting the code, and mypy for typechecking. They are included in the project's development dependencies.
 
 Please make sure that your changes are formatted correctly according to the code style:
 
-* Linting: `ruff gimvicurnik`
+* Linting: `ruff check`
+* Formatting: `ruff format`
 * Typechecking: `mypy gimvicurnik`
-* Formatting: `black gimvicurnik`
diff --git a/API/config.yaml.sample b/API/config.yaml.sample
@@ -30,9 +30,9 @@ sentry:
     requests: 1.0
     other: 1.0
   profilerSampleRate:
-      commands: false
-      requests: false
-      other: false
+    commands: false
+    requests: false
+    other: false
 
 logging:
   version: 1

diff --git a/API/gimvicurnik/__init__.py b/API/gimvicurnik/__init__.py
@@ -235,7 +235,7 @@ def _format_date(date: datetime.date) -> str:
             return date.strftime("%d. %m. %Y")
 
         def _format_week(date: datetime.date) -> str:
-            return f"{_format_date(date)} — {_format_date((date + datetime.timedelta(days=4)))}"
+            return f"{_format_date(date)} \u2013 {_format_date(date + datetime.timedelta(days=4))}"
 
         filters = self.app.jinja_env.filters
         filters["date"] = _format_date

diff --git a/API/gimvicurnik/__main__.py b/API/gimvicurnik/__main__.py
@@ -41,7 +41,9 @@ def _get_version(ctx: click.Context, _param: str, value: str) -> None:
             flask_version = metadata.version("flask")
             werkzeug_version = metadata.version("werkzeug")
             pdfplumber_version = metadata.version("pdfplumber")
+            pdfminer_version = metadata.version("pdfminer.six")
             openpyxl_version = metadata.version("openpyxl")
+            mammoth_version = metadata.version("mammoth")
 
             try:
                 sentry_version = metadata.version("sentry-sdk")
@@ -56,7 +58,9 @@ def _get_version(ctx: click.Context, _param: str, value: str) -> None:
                 f"Flask: {flask_version}\n"
                 f"Werkzeug: {werkzeug_version}\n"
                 f"pdfplumber: {pdfplumber_version}\n"
+                f"pdfminer: {pdfminer_version}\n"
                 f"openpyxl: {openpyxl_version}\n"
+                f"mammoth: {mammoth_version}\n"
                 f"Sentry SDK: {sentry_version}",
                 color=ctx.color,
             )

diff --git a/API/gimvicurnik/blueprints/calendar.py b/API/gimvicurnik/blueprints/calendar.py
@@ -264,7 +264,7 @@ def get_combined_calendar_for_classes(classes: list[str]) -> Response:
                 Class.get_substitutions(None, classes),
                 Class.get_lessons(classes),
                 config.lessonTimes,
-                f"Koledar - {', '.join(classes)} - Gimnazija Vič",
+                f"Koledar \u2013 {', '.join(classes)} \u2013 Gimnazija Vič",
                 config.urls.api + request.path,
             )
 
@@ -274,7 +274,7 @@ def get_timetable_calendar_for_classes(classes: list[str]) -> Response:
                 Class.get_substitutions(None, classes),
                 Class.get_lessons(classes),
                 config.lessonTimes,
-                f"Urnik - {', '.join(classes)} - Gimnazija Vič",
+                f"Urnik \u2013 {', '.join(classes)} \u2013 Gimnazija Vič",
                 config.urls.api + request.path,
                 include_substitutions=False,
             )
@@ -285,7 +285,7 @@ def get_substitutions_calendar_for_classes(classes: list[str]) -> Response:
                 Class.get_substitutions(None, classes),
                 Class.get_lessons(classes),
                 config.lessonTimes,
-                f"Nadomeščanja - {', '.join(classes)} - Gimnazija Vič",
+                f"Nadomeščanja \u2013 {', '.join(classes)} \u2013 Gimnazija Vič",
                 config.urls.api + request.path,
                 include_timetable=False,
             )
@@ -297,6 +297,6 @@ def get_schedules_calendar_for_classes(classes: list[str]) -> Response:
                 .join(Class)
                 .filter(Class.name.in_(classes))
                 .order_by(LunchSchedule.time, LunchSchedule.class_),
-                f"Razporedi kosila - {', '.join(classes)} - Gimnazija Vič",
+                f"Razporedi kosila \u2013 {', '.join(classes)} \u2013 Gimnazija Vič",
                 config.urls.api + request.path,
             )
diff --git a/API/gimvicurnik/blueprints/feed.py b/API/gimvicurnik/blueprints/feed.py
@@ -38,7 +38,7 @@ class DateDisplay(enum.Enum):
 def get_mime_type(url: str) -> str:
     """Get MIME type for a few extensions we know documents use."""
 
-    if re.match(r"\.pdf(?:\?[\w=]*)?$", url):
+    if re.search(r"\.pdf(?:\?[\w=]*)?$", url):
         return "application/pdf"
     if url.endswith(".docx"):
         return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
@@ -81,12 +81,16 @@ def _create_feed(
             last_updated = Session.query(func.max(Document.modified)).filter(query_filter).scalar()
             last_updated = last_updated or date.fromtimestamp(0)
 
+            # Get the frontend page based on the feed type
+            feed_page = "circulars" if feed_type == FeedType.CIRCULARS else "sources"
+
             # Render the feed from Atom/RSS template
             content = render_template(
                 f"{feed_format.value}.xml",
                 urls=config.urls,
                 name=feed_name,
                 type=feed_type.value,
+                page=feed_page,
                 entries=query,
                 last_updated=last_updated,
                 date_display=date_display,

diff --git a/API/gimvicurnik/database/__init__.py b/API/gimvicurnik/database/__init__.py
@@ -186,7 +186,8 @@ def get_empty(cls) -> Iterator[dict[str, Any]]:
         times = Session.query(func.min(Lesson.time), func.max(Lesson.time))[0]
 
         if times[0] is None or times[1] is None:
-            return []
+            yield from ()
+            return
 
         classrooms = Session.query(Classroom.name).order_by(Classroom.name).distinct().all()
         occupied = set(Session.query(Lesson.day, Lesson.time, Classroom.name).join(Classroom).distinct())

diff --git a/API/gimvicurnik/templates/atom.xml b/API/gimvicurnik/templates/atom.xml
@@ -1,14 +1,14 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 
 <feed xmlns="http://www.w3.org/2005/Atom" xmlns:webfeeds="http://webfeeds.org/rss/1.0">
-  <title>{{ name }} - Gimnazija Vič</title>
-  <subtitle>{{ name }} - Gimnazija Vič</subtitle>
+  <title>{{ name }} – Gimnazija Vič</title>
+  <subtitle>{{ name }} – Gimnazija Vič</subtitle>
   <id>{{ urls.api }}/feed/{{ type }}.atom</id>
   <updated>{{ last_updated.strftime("%Y-%m-%dT%H:%M:%SZ") }}</updated>
 
   <link href="{{ urls.api }}/feed/{{ type }}.atom" rel="self" type="application/atom+xml" />
   <link href="{{ urls.api }}/feed/{{ type }}.rss" rel="alternate" type="application/rss+xml" />
-  <link href="{{ urls.website }}/documents" rel="alternate" type="text/html" />
+  <link href="{{ urls.website }}/{{ page }}" rel="alternate" type="text/html" />
 
   <icon>{{ urls.website }}/img/icons/android-chrome-192x192.png</icon>
   <webfeeds:icon>{{ urls.website }}/img/icons/android-chrome-192x192.png</webfeeds:icon>

diff --git a/API/gimvicurnik/templates/rss.xml b/API/gimvicurnik/templates/rss.xml
@@ -2,18 +2,18 @@
 
 <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:webfeeds="http://webfeeds.org/rss/1.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
   <channel>
-    <title>{{ name }} - Gimnazija Vič</title>
-    <description>{{ name }} - Gimnazija Vič</description>
-    <link>{{ urls.website }}/documents</link>
+    <title>{{ name }} – Gimnazija Vič</title>
+    <description>{{ name }} – Gimnazija Vič</description>
+    <link>{{ urls.website }}/{{ page }}</link>
     <lastBuildDate>{{ last_updated.strftime("%a, %d %b %Y %H:%M:%S GMT") }}</lastBuildDate>
 
     <atom:link href="{{ urls.api }}/feed/{{ type }}.rss" rel="self" type="application/rss+xml" />
     <atom:link href="{{ urls.api }}/feed/{{ type }}.atom" rel="alternate" type="application/atom+xml" />
-    <atom:link href="{{ urls.website }}/documents" rel="alternate" type="text/html" />
+    <atom:link href="{{ urls.website }}/{{ page }}" rel="alternate" type="text/html" />
 
     <image>
-      <title>{{ name }} - Gimnazija Vič</title>
-      <link>{{ urls.website }}/documents</link>
+      <title>{{ name }} – Gimnazija Vič</title>
+      <link>{{ urls.website }}/{{ page }}</link>
       <url>{{ urls.website }}/img/icons/android-chrome-192x192.png</url>
     </image>
     <webfeeds:icon>{{ urls.website }}/img/icons/android-chrome-192x192.png</webfeeds:icon>

diff --git a/API/gimvicurnik/updaters/eclassroom.py b/API/gimvicurnik/updaters/eclassroom.py
@@ -9,7 +9,7 @@
 from itertools import product
 from urllib.parse import urlparse
 
-from mammoth import convert_to_html  # type: ignore
+import mammoth  # type: ignore
 from openpyxl import load_workbook
 from sqlalchemy import insert
 
@@ -37,7 +37,7 @@
     from typing import Any
     from collections.abc import Iterator
     from io import BytesIO
-    from mammoth.documents import Image  # type: ignore
+    from mammoth.documents import Image, Hyperlink  # type: ignore
     from sqlalchemy.orm import Session
     from sentry_sdk.tracing import Span
     from ..config import ConfigSourcesEClassroom
@@ -278,7 +278,13 @@ def document_needs_parsing(self, document: DocumentInfo) -> bool:
         return False
 
     @with_span(op="parse", pass_span=True)
-    def parse_document(self, document: DocumentInfo, stream: BytesIO, effective: date, span: Span) -> None:  # type: ignore[override]
+    def parse_document(  # type: ignore[override]
+        self,
+        document: DocumentInfo,
+        stream: BytesIO,
+        effective: date,
+        span: Span,
+    ) -> None:
         """Parse the document and store extracted data."""
 
         span.set_tag("document.source", self.source)
@@ -322,8 +328,19 @@ def extract_document(self, document: DocumentInfo, content: bytes, span: Span) -
         def ignore_images(_image: Image) -> dict:
             return {}
 
+        def transform_hyperlinks(hyperlink: Hyperlink) -> Hyperlink:
+            hyperlink.target_frame = "_blank"
+            return hyperlink
+
         # Convert DOCX to HTML
-        result = convert_to_html(content, convert_image=ignore_images)
+        result = mammoth.convert_to_html(
+            content,
+            convert_image=ignore_images,
+            transform_document=mammoth.transforms.element_of_type(
+                mammoth.documents.Hyperlink,
+                transform_hyperlinks,
+            ),
+        )
         return typing.cast(str, result.value)
 
     def _parse_substitutions_pdf(self, stream: BytesIO, effective: date) -> None:

diff --git a/API/gimvicurnik/updaters/menu.py b/API/gimvicurnik/updaters/menu.py
@@ -55,7 +55,8 @@ def get_documents(self) -> Iterator[DocumentInfo]:
 
         if not menus:
             self.logger.info("No menus found")
-            return iter(())
+            yield from ()
+            return
 
         for menu in menus:
             for link in menu.find_all("a", href=True):
@@ -115,7 +116,13 @@ def document_needs_parsing(self, document: DocumentInfo) -> bool:
         return True
 
     @with_span(op="parse", pass_span=True)
-    def parse_document(self, document: DocumentInfo, stream: BytesIO, effective: datetime.date, span: Span) -> None:  # type: ignore[override]
+    def parse_document(  # type: ignore[override]
+        self,
+        document: DocumentInfo,
+        stream: BytesIO,
+        effective: datetime.date,
+        span: Span,
+    ) -> None:
         """Parse the document and store extracted data."""
 
         span.set_tag("document.source", self.source)

diff --git a/API/gimvicurnik/updaters/solsis.py b/API/gimvicurnik/updaters/solsis.py
@@ -322,7 +322,7 @@ def _download_substitutions(self, date: date_) -> TypeRoot:
         """Download and parse the Solsis JSON file."""
 
         # Every request needs a different nonsense
-        nonsense = "%032x" % getrandbits(128)
+        nonsense = f"{getrandbits(128):032x}"
 
         # Compose the URL
         params = f"func=gateway&call=suplence&datum={date.strftime('%Y-%m-%d')}&nonsense={nonsense}"