From 3497993a3983b172473c996bbab709f919b731a0 Mon Sep 17 00:00:00 2001 From: caiocvsilva Date: Thu, 30 Mar 2023 16:11:15 -0400 Subject: [PATCH 1/3] fixed downloading files with meta-tags --- writer/src/download_request.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/writer/src/download_request.py b/writer/src/download_request.py index 57145c75..f3e25296 100644 --- a/writer/src/download_request.py +++ b/writer/src/download_request.py @@ -6,6 +6,7 @@ import requests import string import time +from bs4 import BeautifulSoup import settings from crawling_utils import notify_file_downloaded_with_error @@ -112,6 +113,29 @@ def exec_download(self, worker_name: str) -> bool: time.sleep(attempt * INTERVAL_BETWEEN_ATTEMPTS) continue + # Create a BeautifulSoup object to parse the HTML content of the requested page. + soup = BeautifulSoup(req.content, "html.parser") + # Find any meta tag with an http-equiv attribute equal to "refresh". + meta_refresh = soup.find("meta", attrs={"http-equiv": "refresh"}) + # If such a meta tag is found: + if meta_refresh: + # Print a message indicating that the URL uses a meta-tag. + print(f"[FD] A página {self.url} utiliza meta-tag") + # Get the value of the "content" attribute of the meta-refresh tag. + content = meta_refresh["content"] + # Check if there is a substring "url=" within the content value. + url_index = content.find("url=") + # If the substring is found: + if url_index != -1: + # Extract the final URL from the content value. + final_url = content[url_index + 4:] + # Print a message indicating the final URL. + print("[FD] URL final: ", final_url) + # Update the URL to the final URL extracted from the meta tag. + self.url = "https://www.cristais.mg.gov.br"+final_url + # Continue looping through the code since the URL has been updated. + continue + with open(self.temp_path_to_save, 'wb') as f: for chunk in req.iter_content(chunk_size=8192): f.write(chunk) From 701165aab77080a5b69fcc170a5d46152d4a44e8 Mon Sep 17 00:00:00 2001 From: Elves Rodrigues Date: Tue, 13 Jun 2023 09:33:45 -0300 Subject: [PATCH 2/3] pyee=>9 -> pyee==9 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c43d6712..fa0bfe0c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,7 +68,7 @@ pyOpenSSL==22.0.0 pyasn1-modules==0.2.8 pyasn1==0.4.8 pycparser==2.21 -pyee=>9 +pyee==9 pyext==0.7 pyparsing==3.0.7 pytesseract==0.3.9 From 9c97b746ce6f490ff39d7e40632b2e7bf5576521 Mon Sep 17 00:00:00 2001 From: breno Date: Wed, 14 Jun 2023 14:49:59 -0300 Subject: [PATCH 3/3] atualiza uso do comando docker-compose para docker compose --- clean_install.py | 6 +++--- install.py | 4 ++-- run.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/clean_install.py b/clean_install.py index c28fada4..f195cd65 100644 --- a/clean_install.py +++ b/clean_install.py @@ -13,9 +13,9 @@ } -subprocess.run("docker-compose down".split(), env=myenv) -subprocess.run("docker-compose -f docker-compose.yml down --rmi all -v".split(), env=myenv) +subprocess.run("docker compose down".split(), env=myenv) +subprocess.run("docker compose -f docker-compose.yml down --rmi all -v".split(), env=myenv) subprocess.run("rm -r ./main/migrations".split()) subprocess.run("mkdir -m 777 ./main/migrations".split()) subprocess.run("touch ./main/migrations/__init__.py".split()) -subprocess.run("docker-compose build --parallel".split(), env=myenv) +subprocess.run("docker compose build --parallel".split(), env=myenv) diff --git a/install.py b/install.py index 522172e1..ad6600b2 100644 --- a/install.py +++ b/install.py @@ -12,5 +12,5 @@ "ENVFILENAME": str(args["debug"]), } -subprocess.run("docker-compose down".split(), env=myenv) -subprocess.run("docker-compose build --parallel".split(), env=myenv) \ No newline at end of file +subprocess.run("docker compose down".split(), env=myenv) +subprocess.run("docker compose build --parallel".split(), env=myenv) \ No newline at end of file diff --git a/run.py b/run.py index 4ca87358..c24c87c4 100644 --- a/run.py +++ b/run.py @@ -12,9 +12,9 @@ **os.environ, "ENVFILENAME": str(args["debug"]), } -subprocess.run("docker-compose down".split(), env=myenv) -subprocess.run("docker-compose -f docker-compose-cleanup.yml down -v".split(), env=myenv) -subprocess.run("docker-compose build --parallel".split(), env=myenv) -subprocess.run("docker-compose up -d".split(), env=myenv) +subprocess.run("docker compose down".split(), env=myenv) +subprocess.run("docker compose -f docker-compose-cleanup.yml down -v".split(), env=myenv) +subprocess.run("docker compose build --parallel".split(), env=myenv) +subprocess.run("docker compose up -d".split(), env=myenv) time.sleep(10) print("Finished") \ No newline at end of file