diff --git a/pysus/online_data/__init__.py b/pysus/online_data/__init__.py index 13ef767..c4109c1 100644 --- a/pysus/online_data/__init__.py +++ b/pysus/online_data/__init__.py @@ -20,7 +20,7 @@ from pyreaddbc import dbc2dbf CACHEPATH = os.getenv( - 'PYSUS_CACHEPATH', os.path.join(str(Path.home()), 'pysus') + "PYSUS_CACHEPATH", os.path.join(str(Path.home()), "pysus") ) # create pysus cache directory @@ -29,34 +29,34 @@ DB_PATHS = { - 'SINAN': [ - '/dissemin/publicos/SINAN/DADOS/FINAIS', - '/dissemin/publicos/SINAN/DADOS/PRELIM', + "SINAN": [ + "/dissemin/publicos/SINAN/DADOS/FINAIS", + "/dissemin/publicos/SINAN/DADOS/PRELIM", ], - 'SIM': [ - '/dissemin/publicos/SIM/CID10/DORES', - '/dissemin/publicos/SIM/CID9/DORES', + "SIM": [ + "/dissemin/publicos/SIM/CID10/DORES", + "/dissemin/publicos/SIM/CID9/DORES", ], - 'SINASC': [ - '/dissemin/publicos/SINASC/NOV/DNRES', - '/dissemin/publicos/SINASC/ANT/DNRES', + "SINASC": [ + "/dissemin/publicos/SINASC/NOV/DNRES", + "/dissemin/publicos/SINASC/ANT/DNRES", ], - 'SIH': [ - '/dissemin/publicos/SIHSUS/199201_200712/Dados', - '/dissemin/publicos/SIHSUS/200801_/Dados', + "SIH": [ + "/dissemin/publicos/SIHSUS/199201_200712/Dados", + "/dissemin/publicos/SIHSUS/200801_/Dados", ], - 'SIA': [ - '/dissemin/publicos/SIASUS/199407_200712/Dados', - '/dissemin/publicos/SIASUS/200801_/Dados', + "SIA": [ + "/dissemin/publicos/SIASUS/199407_200712/Dados", + "/dissemin/publicos/SIASUS/200801_/Dados", ], - 'PNI': ['/dissemin/publicos/PNI/DADOS'], - 'CNES': ['dissemin/publicos/CNES/200508_/Dados'], - 'CIHA': ['/dissemin/publicos/CIHA/201101_/Dados'], + "PNI": ["/dissemin/publicos/PNI/DADOS"], + "CNES": ["dissemin/publicos/CNES/200508_/Dados"], + "CIHA": ["/dissemin/publicos/CIHA/201101_/Dados"], } def FTP_datasus(): - ftp = FTP('ftp.datasus.gov.br') + ftp = FTP("192.168.0.1") ftp.login() return ftp @@ -80,11 +80,11 @@ def parquets_to_dataframe( at time. """ - parquets = list(map(str, Path(parquet_dir).glob('*.parquet'))) + parquets = list(map(str, Path(parquet_dir).glob("*.parquet"))) try: chunks_list = [ - pd.read_parquet(str(f), engine='fastparquet') for f in parquets + pd.read_parquet(str(f), engine="fastparquet") for f in parquets ] df = pd.concat(chunks_list, ignore_index=True) @@ -96,7 +96,7 @@ def parquets_to_dataframe( finally: if clean_after_read: shutil.rmtree(parquet_dir) - logging.info(f'{parquet_dir} removed') + logging.info(f"{parquet_dir} removed") def _parse_dftypes(df: pd.DataFrame) -> pd.DataFrame: @@ -114,48 +114,72 @@ def str_to_int(string: str) -> Union[int, float]: # If removing spaces, all characters are int, # return int(value). @warning it removes in between # spaces as wel - if str(string).replace(' ', '').isnumeric(): - return int(string.replace(' ', '')) + if str(string).replace(" ", "").isnumeric(): + return int(string.replace(" ", "")) def str_to_date(string: str) -> datetime.date: if isinstance(string, str): try: - return datetime.strptime(string, '%Y%m%d').date() + return datetime.strptime(string, "%Y%m%d").date() except Exception: # Ignore errors, bad value pass - map_column_func(['DT_NOTIFIC', 'DT_SIN_PRI'], str_to_date) - map_column_func(['CODMUNRES', 'SEXO'], str_to_int) + map_column_func(["DT_NOTIFIC", "DT_SIN_PRI"], str_to_date) + map_column_func(["CODMUNRES", "SEXO"], str_to_int) df = df.applymap( - lambda x: '' if str(x).isspace() else x + lambda x: "" if str(x).isspace() else x ) # Remove all space values df = df.convert_dtypes() return df + def list_data_sources() -> str: """ List all databases supported by PySUS. + If the databases directory is found, it lists the supported databases. + If the package for the databases modules is not found, it alerts that + no support for the databases was found and lists the + expected databases for implementation. + Returns: - - A formatted string listing the supported databases. + - A formatted string listing the supported databases or the + expected databases for implementation. """ databases_directory = ( - Path(__file__).resolve(strict=True).parent / "ftp" / "databases" + Path(__file__).resolve(strict=True).parent.parent + / "ftp" + / "databasess" ) - supported_databases = [ - file - for file in databases_directory.glob("*.py") - if file.name != "__init__.py" - ] + if databases_directory.exists(): + supported_databases = [ + file.stem.upper() + for file in databases_directory.glob("*.py") + if file.name != "__init__.py" + ] + + return f"""Currently, the supported databases are: { + ', '.join(supported_databases)}""" + else: + expected_databases = [ + "SINAN", + "SIM", + "SINASC", + "SIH", + "SIA", + "PNI", + "CNES", + "CIHA", + ] + return f"""No support for the databases of DATASUS was found." + "Expected databases for implementation are: { + ', '.join(expected_databases)}""" - return f"""Currently, the supported databases are: {', '.join( - file.stem.upper() for file in supported_databases) - }""" class FTP_Inspect: """ @@ -177,7 +201,7 @@ class FTP_Inspect: database: str _ds_paths: list - ftp_server: FTP = FTP('ftp.datasus.gov.br') + ftp_server: FTP = FTP("192.168.0.1") available_dbs: list = list(DB_PATHS.keys()) def __init__(self, database: str) -> None: @@ -187,8 +211,8 @@ def __init__(self, database: str) -> None: def __checkdatabase__(self, database): if database not in self.available_dbs: raise ValueError( - f'{database} not found' - f' available databases: {self.available_dbs}' + f"{database} not found" + f" available databases: {self.available_dbs}" ) return database @@ -202,33 +226,33 @@ def last_update_df(self) -> pd.DataFrame: # Legacy """ if self.database not in DB_PATHS: print( - f'Database {self.database} not supported try one of these' - '{list(DB_PATHS.keys())}' + f"Database {self.database} not supported try one of these" + "{list(DB_PATHS.keys())}" ) return pd.DataFrame() ftp = FTP_datasus() response = { - 'folder': [], - 'date': [], - 'file_size': [], - 'file_name': [], + "folder": [], + "date": [], + "file_size": [], + "file_name": [], } def parse(line): data = line.strip().split() - response['folder'].append(pth) - response['date'].append( - pd.to_datetime(' '.join([data[0], data[1]])) + response["folder"].append(pth) + response["date"].append( + pd.to_datetime(" ".join([data[0], data[1]])) ) - response['file_size'].append( - 0 if data[2] == '' else int(data[2]) + response["file_size"].append( + 0 if data[2] == "" else int(data[2]) ) - response['file_name'].append(data[3]) + response["file_name"].append(data[3]) for pth in DB_PATHS[self.database]: ftp.cwd(pth) - flist = ftp.retrlines('LIST', parse) + flist = ftp.retrlines("LIST", parse) # NOQA F841 ftp.close() return pd.DataFrame(response) @@ -238,9 +262,9 @@ def list_available_years( UF: str = None, SINAN_disease: str = None, CNES_group: str = None, - SIA_group: str = 'PA', - PNI_group: str = 'CPNI', - SIH_group: str = 'RD', + SIA_group: str = "PA", + PNI_group: str = "CPNI", + SIH_group: str = "RD", ): """ Uses `list_all` and filters according to UF, disease (SINAN), @@ -249,10 +273,10 @@ def list_available_years( available_years = set() get_filename = ( lambda x: str(x) - .split('/')[-1] + .split("/")[-1] .upper() - .split('.DBC')[0] - .split('.DBF')[0] + .split(".DBC")[0] + .split(".DBF")[0] ) # Trim url paths def list_years( @@ -265,54 +289,54 @@ def list_years( ] if UF is not None and len(UF) > 2: - raise ValueError('Use UF abbreviation. Eg: RJ') + raise ValueError("Use UF abbreviation. Eg: RJ") # SINAN - if self.database == 'SINAN': + if self.database == "SINAN": if not SINAN_disease: - raise ValueError('No disease assigned to SINAN_disease') + raise ValueError("No disease assigned to SINAN_disease") dis = FTP_SINAN(SINAN_disease) - available_years = dis.get_years(stage='all') + available_years = dis.get_years(stage="all") # SINASC - elif self.database == 'SINASC': + elif self.database == "SINASC": list_years(2) # SIH - elif self.database == 'SIH': + elif self.database == "SIH": list_years(len(SIH_group), slice(-4, -2), SIH_group=SIH_group) # SIA - elif self.database == 'SIA': + elif self.database == "SIA": list_years(len(SIA_group), slice(-4, -2), SIA_group=SIA_group) # CNES - elif self.database == 'CNES': + elif self.database == "CNES": list_years(len(CNES_group), slice(-4, -2), CNES_group=CNES_group) # PNI - elif self.database == 'PNI': + elif self.database == "PNI": list_years(len(PNI_group), PNI_group=PNI_group) # CIHA - elif self.database == 'CIHA': + elif self.database == "CIHA": list_years(4) # SIM - elif self.database == 'SIM': + elif self.database == "SIM": dbcs = self.list_all() available_years = set() for path in dbcs: - if '/CID9/' in path: + if "/CID9/" in path: available_years.add(get_filename(path)[-2:]) if str(path)[ -8:-6 ] == UF else None - elif '/CID10/' in path: + elif "/CID10/" in path: available_years.add(get_filename(path)[-2:]) if str(path)[ -10:-8 ] == UF else None # Normalize years to {year:04d} and return sorted cur_year = str(datetime.now().year)[-2:] - bef_2000 = lambda yrs: [ - '19' + y for y in yrs if y > cur_year and y <= '99' - ] - aft_2000 = lambda yrs: [ - '20' + y for y in yrs if y <= cur_year and y >= '00' + bef_2000 = lambda yrs: [ # NOQA E731 + "19" + y for y in yrs if y > cur_year and y <= "99" + ] # NOQA F841 + aft_2000 = lambda yrs: [ # NOQA E731 + "20" + y for y in yrs if y <= cur_year and y >= "00" ] return sorted(bef_2000(available_years)) + sorted( aft_2000(available_years) @@ -322,9 +346,9 @@ def list_all( self, SINAN_disease: str = None, CNES_group: str = None, - SIA_group: str = 'PA', - PNI_group: str = 'CPNI', - SIH_group: str = 'RD', + SIA_group: str = "PA", + PNI_group: str = "CPNI", + SIH_group: str = "RD", ) -> list: """ Enters FTP server and lists all DBCs or DBFs files found for a @@ -334,46 +358,46 @@ def list_all( chunks, to preserve memory, that are read using pandas and pyarrow. """ available_dbs = list() - ftp = FTP('ftp.datasus.gov.br') + ftp = FTP("192.168.0.1") ftp.login() for path in self._ds_paths: try: # CNES - if self.database == 'CNES': + if self.database == "CNES": if not CNES_group: - raise ValueError(f'No group assigned to CNES_group') + raise ValueError("No group assigned to CNES_group") available_dbs.extend( - ftp.nlst(f'{path}/{CNES_group}/*.DBC') + ftp.nlst(f"{path}/{CNES_group}/*.DBC") ) # SIA - elif self.database == 'SIA': + elif self.database == "SIA": if not SIA_group: - raise ValueError(f'No group assigned to SIA_group') - available_dbs.extend(ftp.nlst(f'{path}/{SIA_group}*.DBC')) + raise ValueError("No group assigned to SIA_group") + available_dbs.extend(ftp.nlst(f"{path}/{SIA_group}*.DBC")) # SIH - elif self.database == 'SIH': + elif self.database == "SIH": if not SIH_group: - raise ValueError(f'No group assigned to SIH_group') - available_dbs.extend(ftp.nlst(f'{path}/{SIH_group}*.DBC')) + raise ValueError("No group assigned to SIH_group") + available_dbs.extend(ftp.nlst(f"{path}/{SIH_group}*.DBC")) # PNI - elif self.database == 'PNI': + elif self.database == "PNI": if not PNI_group: - raise ValueError(f'No group assigned to PNI_group') - available_dbs.extend(ftp.nlst(f'{path}/{PNI_group}*.DBF')) + raise ValueError("No group assigned to PNI_group") + available_dbs.extend(ftp.nlst(f"{path}/{PNI_group}*.DBF")) # SINAN - elif self.database == 'SINAN': + elif self.database == "SINAN": if not SINAN_disease: raise ValueError( - f'No disease assigned to SINAN_disease' + "No disease assigned to SINAN_disease" ) disease = FTP_SINAN(SINAN_disease) available_dbs = disease.get_ftp_paths( - disease.get_years(stage='all') + disease.get_years(stage="all") ) # SIM, SINASC else: available_dbs.extend( - ftp.nlst(f'{path}/*.DBC') # case insensitive + ftp.nlst(f"{path}/*.DBC") # case insensitive ) except Exception as e: raise e @@ -420,9 +444,9 @@ def download( months: Union[str, int, list] = None, SINAN_disease: str = None, CNES_group: str = None, - SIA_group: str = 'PA', - SIH_group: str = 'RD', - PNI_group: str = 'CPNI', + SIA_group: str = "PA", + SIH_group: str = "RD", + PNI_group: str = "CPNI", local_dir: str = cache_dir, ) -> Union[tuple[str], str]: dbc_paths = self._get_dbc_paths( @@ -440,8 +464,8 @@ def download( downloaded_parquets = [] for path in dbc_paths: - parquet_dir = data_dir / str(path).split('/')[-1].upper().replace( - '.DBC', '.parquet' + parquet_dir = data_dir / str(path).split("/")[-1].upper().replace( + ".DBC", ".parquet" ) if Path(parquet_dir).exists(): @@ -465,27 +489,29 @@ def _get_dbc_paths( months: Union[str, int, list] = None, SINAN_disease: str = None, CNES_group: str = None, - SIA_group: str = 'PA', - SIH_group: str = 'RD', - PNI_group: str = 'CPNI', + SIA_group: str = "PA", + SIH_group: str = "RD", + PNI_group: str = "CPNI", ) -> list: - parse_to_list = lambda ite: [ite] if not isinstance(ite, list) else ite + parse_to_list = ( + lambda i: [i] if not isinstance(i, list) else i + ) # NOQA E731 UFs = parse_to_list(UFs) years = parse_to_list(years) months = parse_to_list(months) db = self._ftp_db.database list_files = self._ftp_db.list_all - if db == 'SINAN': + if db == "SINAN": all_dbcs = list_files(SINAN_disease=SINAN_disease) sinan_dis = FTP_SINAN(SINAN_disease) - elif db == 'CNES': + elif db == "CNES": all_dbcs = list_files(CNES_group=CNES_group) - elif db == 'SIA': + elif db == "SIA": all_dbcs = list_files(SIA_group=SIA_group) - elif db == 'SIH': + elif db == "SIH": all_dbcs = list_files(SIH_group=SIH_group) - elif db == 'PNI': + elif db == "PNI": all_dbcs = list_files(PNI_group=PNI_group) else: all_dbcs = list_files() @@ -499,51 +525,51 @@ def url_regex( for instance, lowercase UF and entire years and shortened years at the same time. """ - if db == 'SINAN': + if db == "SINAN": if not year: - raise ValueError('Missing year(s)') + raise ValueError("Missing year(s)") file_pattern = re.compile( - f'{sinan_dis.code}BR{year}.dbc', re.I + f"{sinan_dis.code}BR{year}.dbc", re.I ) - elif db == 'SIM' or db == 'SINASC': + elif db == "SIM" or db == "SINASC": if not year or not UF: - raise ValueError('Missing year(s) or UF(s)') + raise ValueError("Missing year(s) or UF(s)") file_pattern = re.compile( - rf'[DON]+R?{UF}\d?\d?{year}.dbc', re.I + rf"[DON]+R?{UF}\d?\d?{year}.dbc", re.I ) - elif db == 'SIH': + elif db == "SIH": if not year or not month or not UF: - raise ValueError('Missing year(s), month(s) or UF(s)') + raise ValueError("Missing year(s), month(s) or UF(s)") file_pattern = re.compile( - rf'{SIH_group}{UF}{year}{month}.dbc', re.I + rf"{SIH_group}{UF}{year}{month}.dbc", re.I ) - elif db == 'SIA': + elif db == "SIA": if not year or not month or not UF: - raise ValueError('Missing year(s), month(s) or UF(s)') + raise ValueError("Missing year(s), month(s) or UF(s)") file_pattern = re.compile( - rf'{SIA_group}{UF}{year}{month}[abc]?.dbc', re.I + rf"{SIA_group}{UF}{year}{month}[abc]?.dbc", re.I ) - elif db == 'PNI': + elif db == "PNI": if not year or not UF: - raise ValueError('Missing year(s) or UF(s)') - file_pattern = re.compile(rf'{PNI_group}{UF}{year}.dbf', re.I) - elif db == 'CNES': + raise ValueError("Missing year(s) or UF(s)") + file_pattern = re.compile(rf"{PNI_group}{UF}{year}.dbf", re.I) + elif db == "CNES": if not year or not month or not UF: - raise ValueError('Missing year(s), month(s) or UF(s)') + raise ValueError("Missing year(s), month(s) or UF(s)") file_pattern = re.compile( - rf'{CNES_group}/{CNES_group}{UF}{year}{month}.dbc', re.I + rf"{CNES_group}/{CNES_group}{UF}{year}{month}.dbc", re.I ) - elif db == 'CIHA': + elif db == "CIHA": if not year or not month or not UF: - raise ValueError('Missing year(s), month(s) or UF(s)') - file_pattern = re.compile(rf'CIHA{UF}{year}{month}.dbc', re.I) + raise ValueError("Missing year(s), month(s) or UF(s)") + file_pattern = re.compile(rf"CIHA{UF}{year}{month}.dbc", re.I) return file_pattern files = list() for y, m, uf in product( years or [], months or [], UFs or [] ): # Allows None - norm = lambda y: str(y)[-2:].zfill(2) + norm = lambda y: str(y)[-2:].zfill(2) # NOQA E731 regex = url_regex(year=norm(y), month=norm(m), UF=str(uf)) filtered = list(filter(regex.search, all_dbcs)) files.extend(filtered) @@ -555,31 +581,31 @@ def _extract_dbc(self, DBC_path: str, local_dir: str = cache_dir) -> str: local machine. """ Path(local_dir).mkdir(exist_ok=True, parents=True) - filename = DBC_path.split('/')[-1] - filedir = DBC_path.replace(filename, '') + filename = DBC_path.split("/")[-1] + filedir = DBC_path.replace(filename, "") filepath = Path(local_dir) / filename if Path(filepath).exists(): return str(filepath) try: - ftp = ftp = FTP('ftp.datasus.gov.br') + ftp = ftp = FTP("192.168.0.1") ftp.login() ftp.cwd(filedir) ftp.retrbinary( - f'RETR {filename}', - open(f'{filepath}', 'wb').write, + f"RETR {filename}", + open(f"{filepath}", "wb").write, ) ftp.close() return str(filepath) except error_perm as e: - logging.error(f'Not able to download {filename}') + logging.error(f"Not able to download {filename}") raise e def _dbfc_to_parquets(self, fpath: str, local_dir: str) -> str(PosixPath): """DBC/DBF files to parquets using Pandas & PyArrow""" db_path = Path(local_dir) / fpath - dbfile = str(db_path.absolute()).split('/')[-1] - if Path(dbfile).suffix in ['.dbc', '.DBC'] and db_path.exists(): - outpath = f'{fpath[:-4]}.dbf' + dbfile = str(db_path.absolute()).split("/")[-1] + if Path(dbfile).suffix in [".dbc", ".DBC"] and db_path.exists(): + outpath = f"{fpath[:-4]}.dbf" try: dbc2dbf(fpath, outpath) if Path(fpath).exists(): @@ -589,7 +615,7 @@ def _dbfc_to_parquets(self, fpath: str, local_dir: str) -> str(PosixPath): logging.error(e) raise e - parquet_dir = f'{fpath[:-4]}.parquet' + parquet_dir = f"{fpath[:-4]}.parquet" if Path(parquet_dir).exists() and any(os.listdir(parquet_dir)): return parquet_dir Path(parquet_dir).mkdir(exist_ok=True, parents=True) @@ -597,13 +623,13 @@ def _dbfc_to_parquets(self, fpath: str, local_dir: str) -> str(PosixPath): def decode_column(value): # https://stackoverflow.com/questions/57371164/django-postgres-a-string-literal-cannot-contain-nul-0x00-characters if isinstance(value, bytes): - return value.decode(encoding='iso-8859-1').replace('\x00', '') + return value.decode(encoding="iso-8859-1").replace("\x00", "") elif isinstance(value, str): - return str(value).replace('\x00', '') + return str(value).replace("\x00", "") else: return value - for d in self._stream_DBF(DBF(fpath, encoding='iso-8859-1', raw=True)): + for d in self._stream_DBF(DBF(fpath, encoding="iso-8859-1", raw=True)): try: df = pd.DataFrame(d) table = pa.Table.from_pandas(df.applymap(decode_column)) @@ -635,41 +661,41 @@ def _stream_DBF(self, dbf, chunk_size=30000): class FTP_SINAN: name: str diseases: dict = { - 'Animais Peçonhentos': 'ANIM', - 'Botulismo': 'BOTU', - 'Cancer': 'CANC', - 'Chagas': 'CHAG', - 'Chikungunya': 'CHIK', - 'Colera': 'COLE', - 'Coqueluche': 'COQU', - 'Contact Communicable Disease': 'ACBI', - 'Acidentes de Trabalho': 'ACGR', - 'Dengue': 'DENG', - 'Difteria': 'DIFT', - 'Esquistossomose': 'ESQU', - 'Febre Amarela': 'FAMA', - 'Febre Maculosa': 'FMAC', - 'Febre Tifoide': 'FTIF', - 'Hanseniase': 'HANS', - 'Hantavirose': 'HANT', - 'Hepatites Virais': 'HEPA', - 'Intoxicação Exógena': 'IEXO', - 'Leishmaniose Visceral': 'LEIV', - 'Leptospirose': 'LEPT', - 'Leishmaniose Tegumentar': 'LTAN', - 'Malaria': 'MALA', - 'Meningite': 'MENI', - 'Peste': 'PEST', - 'Poliomielite': 'PFAN', - 'Raiva Humana': 'RAIV', - 'Sífilis Adquirida': 'SIFA', - 'Sífilis Congênita': 'SIFC', - 'Sífilis em Gestante': 'SIFG', - 'Tétano Acidental': 'TETA', - 'Tétano Neonatal': 'TETN', - 'Tuberculose': 'TUBE', - 'Violência Domestica': 'VIOL', - 'Zika': 'ZIKA', + "Animais Peçonhentos": "ANIM", + "Botulismo": "BOTU", + "Cancer": "CANC", + "Chagas": "CHAG", + "Chikungunya": "CHIK", + "Colera": "COLE", + "Coqueluche": "COQU", + "Contact Communicable Disease": "ACBI", + "Acidentes de Trabalho": "ACGR", + "Dengue": "DENG", + "Difteria": "DIFT", + "Esquistossomose": "ESQU", + "Febre Amarela": "FAMA", + "Febre Maculosa": "FMAC", + "Febre Tifoide": "FTIF", + "Hanseniase": "HANS", + "Hantavirose": "HANT", + "Hepatites Virais": "HEPA", + "Intoxicação Exógena": "IEXO", + "Leishmaniose Visceral": "LEIV", + "Leptospirose": "LEPT", + "Leishmaniose Tegumentar": "LTAN", + "Malaria": "MALA", + "Meningite": "MENI", + "Peste": "PEST", + "Poliomielite": "PFAN", + "Raiva Humana": "RAIV", + "Sífilis Adquirida": "SIFA", + "Sífilis Congênita": "SIFC", + "Sífilis em Gestante": "SIFG", + "Tétano Acidental": "TETA", + "Tétano Neonatal": "TETN", + "Tuberculose": "TUBE", + "Violência Domestica": "VIOL", + "Zika": "ZIKA", } def __init__(self, name: str) -> None: @@ -683,11 +709,11 @@ def __diseasecheck__(self, name: str) -> str: return ( name if name in self.diseases.keys() - else ValueError(f'{name} not found.') + else ValueError(f"{name} not found.") ) def __repr__(self) -> str: - return f'SINAN Disease ({self.name})' + return f"SINAN Disease ({self.name})" def __str__(self) -> str: return self.name @@ -696,7 +722,7 @@ def __str__(self) -> str: def code(self) -> str: return self.diseases[self.name] - def get_years(self, stage: str = 'all') -> list: + def get_years(self, stage: str = "all") -> list: """ Returns the available years to download, if no stage is assigned, it will return years from both finals and @@ -706,16 +732,16 @@ def get_years(self, stage: str = 'all') -> list: def extract_years(paths): return [ - str(path).split('/')[-1].split('.dbc')[0][-2:] + str(path).split("/")[-1].split(".dbc")[0][-2:] for path in paths ] prelim_years = extract_years(self.prelims) finais_years = extract_years(self.finals) - if stage == 'prelim': + if stage == "prelim": return sorted(prelim_years) - elif stage == 'finais': + elif stage == "finais": return sorted(finais_years) return sorted(prelim_years + finais_years)