Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(PNI): include PNI Database #181

Merged
merged 23 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,953 changes: 3,953 additions & 0 deletions poetry.lock
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for some reason, poetry.lock wasn't on repo yet 🤔

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ wget = "^3.2"
loguru = "^0.6.0"
Unidecode = "^1.3.6"
dateparser = "^1.1.8"
pandas = ">=1.5.3"
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pandas 1.5.3 uses applymap instead of map

pandas = ">=2.1.0"
urwid = "^2.1.2"
elasticsearch = { version = "7.16.2", extras=["preprocessing"] }
# FTP
Expand Down
29 changes: 26 additions & 3 deletions pysus/ftp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pathlib
from datetime import datetime
from ftplib import FTP
from typing import Any, Dict, List, Optional, Set, Union
from typing import Any, Dict, List, Optional, Set, Union, Self

import humanize
from aioftp import Client
Expand Down Expand Up @@ -48,6 +48,7 @@ class File:
extension: str
basename: str
path: str
# parent: Directory # TODO: This causes too much overhead
__info__: Set[Union[int, str, datetime]]

def __init__(self, path: str, name: str, info: dict) -> None:
Expand All @@ -60,6 +61,7 @@ def __init__(self, path: str, name: str, info: dict) -> None:
if path.endswith("/")
else path + "/" + self.basename
)
self.parent_path = self.path.replace(self.basename, "")
self.__info__ = info

def __str__(self) -> str:
Expand Down Expand Up @@ -327,7 +329,7 @@ def content(self):

def load(self):
"""
The content of a Directory must be explicity loaded
The content of a Directory must be explicitly loaded
"""
self.__content__ |= load_path(self.path)
self.loaded = True
Expand All @@ -340,6 +342,27 @@ def reload(self):
self.loaded = False
return self.load()

def is_parent(self, other: Union[Self, File]) -> bool:
"""
Checks if Directory or File is inside (or at any subdir) of self.
"""
if self.path == "/":
return True

target = other
while target.path != "/":

if self.path == target.path:
return True

if isinstance(other, File):
# TODO: Implement parent logic on File (too much overhead)
target = Directory(other.parent_path)
else:
target = target.parent

return False


CACHE["/"] = Directory("/")

Expand Down Expand Up @@ -444,7 +467,7 @@ def __repr__(self) -> str:
def content(self) -> List[Union[Directory, File]]:
"""
Lists Database content. The `paths` will be loaded if this property is
called or if explicty using `load()`. To add specific Directory inside
called or if explicitly using `load()`. To add specific Directory inside
content, `load()` the directory and call `content` again.
"""
if not self.__content__:
Expand Down
97 changes: 97 additions & 0 deletions pysus/ftp/databases/ciha.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from typing import List, Union, Optional

from pysus.ftp import Database, Directory, File
from pysus.ftp.utils import zfill_year, to_list, parse_UFs, UFs, MONTHS


class CIHA(Database):
name = "CIHA"
paths = (Directory("/dissemin/publicos/CIHA/201101_/Dados"))
metadata = {
"long_name": "Comunicação de Internação Hospitalar e Ambulatorial",
"source": "http://ciha.datasus.gov.br/CIHA/index.php",
"description": (
"A CIHA foi criada para ampliar o processo de planejamento, programação, "
"controle, avaliação e regulação da assistência à saúde permitindo um "
"conhecimento mais abrangente e profundo dos perfis nosológico e "
"epidemiológico da população brasileira, da capacidade instalada e do "
"potencial de produção de serviços do conjunto de estabelecimentos de saúde "
"do País. O sistema permite o acompanhamento das ações e serviços de saúde "
"custeados por: planos privados de assistência à saúde; planos públicos; "
"pagamento particular por pessoa física; pagamento particular por pessoa "
"jurídica; programas e projetos federais (PRONON, PRONAS, PROADI); recursos "
"próprios das secretarias municipais e estaduais de saúde; DPVAT; gratuidade "
"e, a partir da publicação da Portaria GM/MS nº 2.905/2022, consórcios públicos. "
"As informações registradas na CIHA servem como base para o processo de "
"Certificação de Entidades Beneficentes de Assistência Social em Saúde (CEBAS) "
"e para monitoramento dos programas PRONAS e PRONON."
),
}
groups = {
"CIHA": "Comunicação de Internação Hospitalar e Ambulatorial",
}

def describe(self, file: File):
if not isinstance(file, File):
return file

if file.extension.upper() in [".DBC", ".DBF"]:
group, _uf, year, month = self.format(file)

try:
uf = UFs[_uf]
except KeyError:
uf = _uf

description = {
"name": str(file.basename),
"group": self.groups[group],
"uf": uf,
"month": MONTHS[int(month)],
"year": zfill_year(year),
"size": file.info["size"],
"last_update": file.info["modify"],
}

return description
return file

def format(self, file: File) -> tuple:
group, _uf = file.name[:4].upper(), file.name[4:6].upper()
year, month = file.name[-4:-2], file.name[-2:]
return group, _uf, zfill_year(year), month

def get_files(
self,
uf: Optional[Union[List[str], str]] = None,
year: Optional[Union[list, str, int]] = None,
month: Optional[Union[list, str, int]] = None,
group: Union[List[str], str] = "CIHA",
) -> List[File]:
files = list(filter(
lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
))

groups = [gr.upper() for gr in to_list(group)]

if not all(gr in list(self.groups) for gr in groups):
raise ValueError(
"Unknown CIHA Group(s): "
f"{set(groups).difference(list(self.groups))}"
)

files = list(filter(lambda f: self.format(f)[0] in groups, files))

if uf:
ufs = parse_UFs(uf)
files = list(filter(lambda f: self.format(f)[1] in ufs, files))

if year or str(year) in ["0", "00"]:
years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
files = list(filter(lambda f: self.format(f)[2] in years, files))

if month:
months = [str(y)[-2:].zfill(2) for y in to_list(month)]
files = list(filter(lambda f: self.format(f)[3] in months, files))

return files
20 changes: 15 additions & 5 deletions pysus/ftp/databases/cnes.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ def load(
group in self.groups for group in [gr.upper() for gr in groups]
):
raise ValueError(
f"Unknown CNES group(s): {set(groups).difference(self.groups)}"
"Unknown CNES group(s): "
f"{set(groups).difference(self.groups)}"
)

for group in groups:
Expand All @@ -68,25 +69,34 @@ def load(
self.__loaded__.add(directory.name)
return self

def describe(self, file: File):
def describe(self, file: File) -> dict:
if not isinstance(file, File):
return file
return {}

if file.name == "GMufAAmm":
# Leftover
return {}

if file.extension.upper() in [".DBC", ".DBF"]:
group, _uf, year, month = self.format(file)

try:
uf = UFs[_uf]
except KeyError:
uf = _uf

description = {
"name": str(file.basename),
"group": self.groups[group],
"uf": UFs[_uf],
"uf": uf,
"month": MONTHS[int(month)],
"year": zfill_year(year),
"size": file.info["size"],
"last_update": file.info["modify"],
}

return description
return file
return {}

def format(self, file: File) -> tuple:
group, _uf = file.name[:2].upper(), file.name[2:4].upper()
Expand Down
94 changes: 94 additions & 0 deletions pysus/ftp/databases/pni.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import List, Union, Optional, Literal

from pysus.ftp import Database, Directory, File
from pysus.ftp.utils import zfill_year, to_list, parse_UFs, UFs


class PNI(Database):
name = "PNI"
paths = (
Directory("/dissemin/publicos/PNI/DADOS"),
)
metadata = {
"long_name": "Sistema de Informações do Programa Nacional de Imunizações",
"source": (
"https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/",
"https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/",
),
"description": (
"O SI-PNI é um sistema desenvolvido para possibilitar aos gestores "
"envolvidos no Programa Nacional de Imunização, a avaliação dinâmica "
"do risco quanto à ocorrência de surtos ou epidemias, a partir do "
"registro dos imunobiológicos aplicados e do quantitativo populacional "
"vacinado, agregados por faixa etária, período de tempo e área geográfica. "
"Possibilita também o controle do estoque de imunobiológicos necessário "
"aos administradores que têm a incumbência de programar sua aquisição e "
"distribuição. Controla as indicações de aplicação de vacinas de "
"imunobiológicos especiais e seus eventos adversos, dentro dos Centros "
"de Referências em imunobiológicos especiais."
),
}
groups = {
"CPNI": "Centro de Parto Normal Intra-Hospitalar",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's curious that PNI has a dataset for "Parto Normal Intra-Hospitalar", does not seem related...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By any chance, do you know the difference between CPNI and DPNI? both file prefixes are found in the ftp server

"DPNI": "Departamento de Imunização e Doenças Imunopreveníveis",
}

def describe(self, file: File) -> dict:
if file.extension.upper() in [".DBC", ".DBF"]:
group, _uf, year = self.format(file)

try:
uf = UFs[_uf]
except KeyError:
uf = _uf

description = {
"name": file.basename,
"group": self.groups[group],
"uf": uf,
"year": zfill_year(year),
"size": file.info["size"],
"last_update": file.info["modify"],
}

return description
return {}

def format(self, file: File) -> tuple:

if len(file.name) != 8:
raise ValueError(f"Can't format {file.name}")

n = file.name
group, _uf, year = n[:4], n[4:6], n[-2:]
return group, _uf, zfill_year(year)

def get_files(
self,
group: Union[list, Literal["CNPI", "DPNI"]],
uf: Optional[Union[List[str], str]] = None,
year: Optional[Union[list, str, int]] = None,
) -> List[File]:
files = list(filter(
lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
))

groups = [gr.upper() for gr in to_list(group)]

if not all(gr in list(self.groups) for gr in groups):
raise ValueError(
"Unknown PNI Group(s): "
f"{set(groups).difference(list(self.groups))}"
)

files = list(filter(lambda f: self.format(f)[0] in groups, files))

if uf:
ufs = parse_UFs(uf)
files = list(filter(lambda f: self.format(f)[1] in ufs, files))

if year or str(year) in ["0", "00"]:
years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
files = list(filter(lambda f: self.format(f)[2] in years, files))

return files
18 changes: 12 additions & 6 deletions pysus/ftp/databases/sia.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@

class SIA(Database):
name = "SIA"
paths = [
paths = (
Directory("/dissemin/publicos/SIASUS/199407_200712/Dados"),
Directory("/dissemin/publicos/SIASUS/200801_/Dados"),
]
)
metadata = {
"long_name": "Sistema de Informações Ambulatoriais",
"source": "http://sia.datasus.gov.br/principal/index.php",
Expand Down Expand Up @@ -41,8 +41,8 @@ class SIA(Database):
"IMPBO": "", # TODO
"PA": "Produção Ambulatorial",
"PAM": "", # TODO
"PAR": "", # TODO
"PAS": "", # TODO
"PAR": "", # TODO
"PAS": "", # TODO
"PS": "RAAS Psicossocial",
"SAD": "RAAS de Atenção Domiciliar",
}
Expand All @@ -51,10 +51,15 @@ def describe(self, file: File) -> dict:
if file.extension.upper() == ".DBC":
group, _uf, year, month = self.format(file)

try:
uf = UFs[_uf]
except KeyError:
uf = _uf

description = {
"name": str(file.basename),
"group": self.groups[group],
"uf": UFs[_uf],
"uf": uf,
"month": MONTHS[int(month)],
"year": zfill_year(year),
"size": file.info["size"],
Expand Down Expand Up @@ -88,7 +93,8 @@ def get_files(

if not all(gr in list(self.groups) for gr in groups):
raise ValueError(
f"Unknown SIH Group(s): {set(groups).difference(list(self.groups))}"
"Unknown SIA Group(s): "
f"{set(groups).difference(list(self.groups))}"
)

files = list(filter(lambda f: self.format(f)[0] in groups, files))
Expand Down
Loading
Loading