Remove pango dependency (#271)

* Removed wasyprint(and pango) dependency as we are defaulting to xhtml2pdf library now. * Documentation changes. * Ruff fixes.
daxa-ai · Mar 14, 2024 · 2665775 · 2665775
1 parent cd0639e
commit 2665775
Show file tree

Hide file tree

Showing 11 changed files with 82 additions and 40 deletions.
diff --git a/docs/gh_pages/docs/config.md b/docs/gh_pages/docs/config.md
@@ -23,6 +23,7 @@ Notes:
 
 - `format`: Specifies the format of generated reports. Available options include 'pdf'.
 - `renderer`: Specifies the rendering engine for generating reports. Options include 'weasyprint', 'xhtml2pdf'.
+Note: If you put renderer as `weasyprint`, then you need to install Pango. Follow [these instructions](./installation.md#pre-requisites) for the same.
 - `outputDir`: Defines the directory where generated reports will be saved.
 
 ### Classifier

diff --git a/docs/gh_pages/docs/development.md b/docs/gh_pages/docs/development.md
@@ -9,7 +9,9 @@ The following instructions are **tested on Mac OSX and Linux (Debian).**
 
 ### Prerequisites
 
-Install the following prerequisites. This is needed for PDF report generation.
+Install the following prerequisites. This is needed for PDF report generation,
+
+if you have put `weasyprint` as renderer in the config.yaml
 
 #### Mac OSX
 
@@ -23,6 +25,11 @@ brew install pango
 sudo apt-get install libpango-1.0-0 libpangoft2-1.0-0
 ```
 
+### Install weasyprint library
+```sh
+pip install weasyprint
+```
+
 ## Build, Install and Run
 
 Fork and clone the pebblo repo. From within the pebblo directory, create a python virtual-env, build pebblo package (in `wheel` format), install and run.

diff --git a/docs/gh_pages/docs/installation.md b/docs/gh_pages/docs/installation.md
@@ -4,6 +4,8 @@
 > Please note that Pebblo requires Python version 3.9 or above to function optimally.
 
 ### Pre-requisites
+Install the following prerequisites. This is needed for PDF report generation,
+if you have put `weasyprint` as renderer in the config.yaml
 
 #### Mac OSX
 
@@ -17,6 +19,11 @@ brew install pango
 sudo apt-get install libpango-1.0-0 libpangoft2-1.0-0
 ```
 
+### Install weasyprint library
+```sh
+pip install weasyprint
+```
+
 ### Pebblo Server
 
 ```

diff --git a/pebblo/app/config/service.py b/pebblo/app/config/service.py
@@ -8,9 +8,9 @@
 from fastapi import FastAPI, Response
 from fastapi.staticfiles import StaticFiles
 
+from pebblo.app.exceptions.exception_handler import exception_handlers
 from pebblo.app.routers.local_ui_routers import local_ui_router_instance
 from pebblo.app.routers.redirection_router import redirect_router_instance
-from pebblo.app.exceptions.exception_handler import exception_handlers
 
 with redirect_stdout(StringIO()), redirect_stderr(StringIO()):
     from pebblo.app.routers.routers import router_instance

diff --git a/pebblo/app/exceptions/exception_handler.py b/pebblo/app/exceptions/exception_handler.py
@@ -1,6 +1,6 @@
+from fastapi import Request
 from fastapi.exceptions import HTTPException
 from fastapi.responses import RedirectResponse
-from fastapi import Request
 
 
 async def not_found_error(request: Request, exc: HTTPException):

diff --git a/pebblo/app/service/service.py b/pebblo/app/service/service.py
@@ -40,7 +40,7 @@ def _write_pdf_report(self, final_report):
             f"/{load_id}/{CacheDir.REPORT_FILE_NAME.value}"
         )
         full_file_path = get_full_path(current_load_report_file_path)
-        report_obj.generate_report(
+        _, _ = report_obj.generate_report(
             data=final_report,
             output_path=full_file_path,
             format_string=report_format,
@@ -53,13 +53,16 @@ def _write_pdf_report(self, final_report):
             f"/{CacheDir.REPORT_FILE_NAME.value}"
         )
         full_file_path = get_full_path(current_app_report_file_path)
-        report_obj.generate_report(
+        status, result = report_obj.generate_report(
             data=final_report,
             output_path=full_file_path,
             format_string=report_format,
             renderer=renderer,
         )
-        logger.info(f"PDF report generated, please check path : {full_file_path}")
+        if not status:
+            logger.error(f"PDF report is not generated. {result}")
+        else:
+            logger.info(f"PDF report generated, please check path : {full_file_path}")
 
     def _upsert_loader_details(self, app_details):
         """

diff --git a/pebblo/reports/html_to_pdf_generator/generator_functions.py b/pebblo/reports/html_to_pdf_generator/generator_functions.py
@@ -4,24 +4,37 @@
 
 import os
 
-from weasyprint import CSS, HTML
-from xhtml2pdf import pisa
-
 
 # Creates PDF from template using weasyprint
 def weasyprint_pdf_converter(source_html, output_path, search_path):
     """PDF generator function for weasyprint renderer"""
-    base_url = os.path.dirname(os.path.realpath(__file__))
-    html_doc = HTML(string=source_html, base_url=base_url)
-    return html_doc.write_pdf(
-        target=output_path, stylesheets=[CSS(search_path + "/index.css")]
-    )
+    try:
+        from weasyprint import CSS, HTML
+
+        base_url = os.path.dirname(os.path.realpath(__file__))
+        html_doc = HTML(string=source_html, base_url=base_url)
+        result = html_doc.write_pdf(
+            target=output_path, stylesheets=[CSS(search_path + "/index.css")]
+        )
+        return True, result
+    except ImportError:
+        error = """Could not import weasyprint package. Please install weasyprint and Pango to generate report using weasyprint.
+          Follow documentation for more details - https://daxa-ai.github.io/pebblo/installation"
+        """
+        return False, error
+    except Exception as e:
+        return False, e
 
 
 # Creates PDF from template using xhtml2pdf
 def xhtml2pdf_pdf_converter(source_html, output_path, _):
     """PDF generator function for xhtml2pdf renderer"""
-    with open(output_path, "w+b") as result_file:
-        pisa_status = pisa.CreatePDF(src=source_html, dest=result_file)
-        result_file.close()
-        return pisa_status.err
+    try:
+        from xhtml2pdf import pisa
+
+        with open(output_path, "w+b") as result_file:
+            pisa_status = pisa.CreatePDF(src=source_html, dest=result_file)
+            result_file.close()
+            return True, pisa_status.err
+    except Exception as e:
+        return False, e
diff --git a/pebblo/reports/html_to_pdf_generator/report_generator.py b/pebblo/reports/html_to_pdf_generator/report_generator.py
@@ -3,12 +3,13 @@
 """
 
 import datetime
+import time
 from decimal import Decimal
 
 import jinja2
 
 from pebblo.reports.enums.report_libraries import library_function_mapping
-import time
+from pebblo.reports.libs.logger import logger
 
 
 def date_formatter(date_obj):
@@ -30,21 +31,28 @@ def get_file_size(size):
 
 
 def convert_html_to_pdf(data, output_path, template_name, search_path, renderer):
-    """Convert HTML Template to PDF by embedding JSON data"""
-    template_loader = jinja2.FileSystemLoader(searchpath=search_path)
-    template_env = jinja2.Environment(loader=template_loader)
-    template = template_env.get_template(template_name)
-    current_date = (
-        datetime.datetime.now().strftime("%B %d, %Y") + " " + time.localtime().tm_zone
-    )
-    source_html = template.render(
-        data=data,
-        date=current_date,
-        datastores=data["dataSources"][0],
-        findingDetails=data["dataSources"][0]["findingsDetails"],
-        loadHistoryItemsToDisplay=data["loadHistory"]["history"],
-        dateFormatter=date_formatter,
-        getFileSize=get_file_size,
-    )
-    pdf_converter = library_function_mapping[renderer]
-    pdf_converter(source_html, output_path, search_path)
+    try:
+        """Convert HTML Template to PDF by embedding JSON data"""
+        template_loader = jinja2.FileSystemLoader(searchpath=search_path)
+        template_env = jinja2.Environment(loader=template_loader)
+        template = template_env.get_template(template_name)
+        current_date = (
+            datetime.datetime.now().strftime("%B %d, %Y")
+            + " "
+            + time.localtime().tm_zone
+        )
+        source_html = template.render(
+            data=data,
+            date=current_date,
+            datastores=data["dataSources"][0],
+            findingDetails=data["dataSources"][0]["findingsDetails"],
+            loadHistoryItemsToDisplay=data["loadHistory"]["history"],
+            dateFormatter=date_formatter,
+            getFileSize=get_file_size,
+        )
+        pdf_converter = library_function_mapping[renderer]
+        status, result = pdf_converter(source_html, output_path, search_path)
+        return status, result
+    except Exception as e:
+        logger.error(e)
+        return False, ""
diff --git a/pebblo/reports/reports.py b/pebblo/reports/reports.py
@@ -32,13 +32,15 @@ def generate_report(
             search_path = os.path.join(os.path.dirname(__file__), "templates/")
             try:
                 template_name = template_renderer_mapping[renderer]
-                convert_html_to_pdf(
+                status, result = convert_html_to_pdf(
                     data,
                     output_path,
                     template_name=template_name,
                     search_path=search_path,
                     renderer=renderer,
                 )
+                return status, result
+
             except KeyError as e:
                 logger.error(
                     "Renderer %s not supported. Please use supported renderers: "
@@ -48,5 +50,7 @@ def generate_report(
                     ReportLibraries.XHTML2PDF,
                     e,
                 )
+                return False, ""
         else:
             logger.error("Output file format %s not supported", format)
+            return False, ""
diff --git a/pyproject.toml b/pyproject.toml
@@ -115,7 +115,6 @@ dependencies = [
   "jinja2>=3.1.3",
   "tqdm",
   "xhtml2pdf==0.2.15",
-  "weasyprint==60.2",
 ]
 
 # List additional groups of dependencies here (e.g. development

diff --git a/tests/app/test_daemon.py b/tests/app/test_daemon.py
@@ -152,7 +152,7 @@ def test_loader_doc_success(
 
     reports_instance = reports.return_value
     reports_instance.generate_report = Mock()
-    reports_instance.generate_report.return_value = None
+    reports_instance.generate_report.return_value = None, None
 
     loader_doc = {
         "name": "Test App",