diff --git a/clean_install.py b/clean_install.py index c28fada4..f195cd65 100644 --- a/clean_install.py +++ b/clean_install.py @@ -13,9 +13,9 @@ } -subprocess.run("docker-compose down".split(), env=myenv) -subprocess.run("docker-compose -f docker-compose.yml down --rmi all -v".split(), env=myenv) +subprocess.run("docker compose down".split(), env=myenv) +subprocess.run("docker compose -f docker-compose.yml down --rmi all -v".split(), env=myenv) subprocess.run("rm -r ./main/migrations".split()) subprocess.run("mkdir -m 777 ./main/migrations".split()) subprocess.run("touch ./main/migrations/__init__.py".split()) -subprocess.run("docker-compose build --parallel".split(), env=myenv) +subprocess.run("docker compose build --parallel".split(), env=myenv) diff --git a/install.py b/install.py index 522172e1..ad6600b2 100644 --- a/install.py +++ b/install.py @@ -12,5 +12,5 @@ "ENVFILENAME": str(args["debug"]), } -subprocess.run("docker-compose down".split(), env=myenv) -subprocess.run("docker-compose build --parallel".split(), env=myenv) \ No newline at end of file +subprocess.run("docker compose down".split(), env=myenv) +subprocess.run("docker compose build --parallel".split(), env=myenv) \ No newline at end of file diff --git a/run.py b/run.py index 4ca87358..c24c87c4 100644 --- a/run.py +++ b/run.py @@ -12,9 +12,9 @@ **os.environ, "ENVFILENAME": str(args["debug"]), } -subprocess.run("docker-compose down".split(), env=myenv) -subprocess.run("docker-compose -f docker-compose-cleanup.yml down -v".split(), env=myenv) -subprocess.run("docker-compose build --parallel".split(), env=myenv) -subprocess.run("docker-compose up -d".split(), env=myenv) +subprocess.run("docker compose down".split(), env=myenv) +subprocess.run("docker compose -f docker-compose-cleanup.yml down -v".split(), env=myenv) +subprocess.run("docker compose build --parallel".split(), env=myenv) +subprocess.run("docker compose up -d".split(), env=myenv) time.sleep(10) print("Finished") \ No newline at end of file diff --git a/writer/src/download_request.py b/writer/src/download_request.py index 57145c75..f3e25296 100644 --- a/writer/src/download_request.py +++ b/writer/src/download_request.py @@ -6,6 +6,7 @@ import requests import string import time +from bs4 import BeautifulSoup import settings from crawling_utils import notify_file_downloaded_with_error @@ -112,6 +113,29 @@ def exec_download(self, worker_name: str) -> bool: time.sleep(attempt * INTERVAL_BETWEEN_ATTEMPTS) continue + # Create a BeautifulSoup object to parse the HTML content of the requested page. + soup = BeautifulSoup(req.content, "html.parser") + # Find any meta tag with an http-equiv attribute equal to "refresh". + meta_refresh = soup.find("meta", attrs={"http-equiv": "refresh"}) + # If such a meta tag is found: + if meta_refresh: + # Print a message indicating that the URL uses a meta-tag. + print(f"[FD] A página {self.url} utiliza meta-tag") + # Get the value of the "content" attribute of the meta-refresh tag. + content = meta_refresh["content"] + # Check if there is a substring "url=" within the content value. + url_index = content.find("url=") + # If the substring is found: + if url_index != -1: + # Extract the final URL from the content value. + final_url = content[url_index + 4:] + # Print a message indicating the final URL. + print("[FD] URL final: ", final_url) + # Update the URL to the final URL extracted from the meta tag. + self.url = "https://www.cristais.mg.gov.br"+final_url + # Continue looping through the code since the URL has been updated. + continue + with open(self.temp_path_to_save, 'wb') as f: for chunk in req.iter_content(chunk_size=8192): f.write(chunk)