Skip to content

Commit

Permalink
Check closed with API Entreprises
Browse files Browse the repository at this point in the history
  • Loading branch information
mlvernay committed Jan 22, 2025
1 parent 6ea27b5 commit faea902
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 0 deletions.
75 changes: 75 additions & 0 deletions erp/management/commands/api_entreprises_clean_closed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from datetime import timedelta

from django.core.management.base import BaseCommand
from django.db.models import Q
from django.utils import timezone

from erp.models import Activite, Erp
from erp.provider.entreprise import check_closed

IGNORED_ACTIVITIES = ["Administration publique", "Mairie", "Gendarmerie", "Bureau de poste"]


class Command(BaseCommand):
help = "Check for closed ERPs from outscraper API"

def add_arguments(self, parser):
parser.add_argument(
"--start_pk",
type=int,
required=False,
default=0,
help="Resume the check since this given ERP PK in our DB.",
)
parser.add_argument(
"--write",
default=False,
action="store_true",
help="Actually edit the database",
)

parser.add_argument(
"--nb_days",
type=int,
required=False,
default=60,
help="Check the ERPs which have not been checked in the last nb_days.",
)

def _flag_erp_as_closed(self, existing_erp):
print(f"Flag permanently closed ERP: {existing_erp} - {existing_erp.get_absolute_uri()}")
if not self.write:
print("Dry run mode, no DB action, use --write to apply this deletion")
return

existing_erp.permanently_closed = True
existing_erp.save()

def handle(self, *args, **options):
self.write = options["write"]
self.start_pk = options.get("start_pk")

ignored_activities = Activite.objects.filter(nom__in=IGNORED_ACTIVITIES)
if ignored_activities.count() != len(IGNORED_ACTIVITIES):
print("Please check the IGNORED_ACTIVITIES list, at least one activity has not been found. Exit...")
return

limit_date = timezone.now() - timedelta(days=options["nb_days"])
qs = Erp.objects.published().filter(Q(check_closed_at=None) | Q(check_closed_at__lte=limit_date))
qs = qs.exclude(activite__in=ignored_activities)
if self.start_pk:
qs = qs.filter(pk__gte=self.start_pk)
qs = qs.order_by("pk")

for erp in qs.iterator():
print(f"Checking ERP with PK {erp.pk}")
query = f"{erp.numero} {erp.voie}" if erp.numero else erp.lieu_dit
query = f"{erp.nom}, {query} {erp.code_postal} {erp.commune}"

if check_closed(query, erp.commune_ext.code_insee):
self._flag_erp_as_closed(erp)
continue

if self.write:
erp.check_closed_at = timezone.now()
erp.save(update_fields=("check_closed_at",))
22 changes: 22 additions & 0 deletions erp/provider/entreprise.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,25 @@ def search(terms, code_insee, activities):
return []
except requests.exceptions.RequestException as err:
raise RuntimeError(f"entreprise api error: {err}")


def check_closed(term, code_insee):
payload = {
"per_page": MAX_PER_PAGE,
"page": 1,
"q": term,
"code_insee": code_insee,
"categorie_entreprise": "PME,ETI",
}
res = requests.get(
f"{BASE_URL_ENTERPRISE_API}",
payload,
timeout=5,
)
try:
if not (len(results := (res.json().get("results") or [])) == 1):
return False
except (requests.exceptions.JSONDecodeError, requests.exceptions.ReadTimeout):
return False

return results[0].get("siege", {}).get("date_fermeture") is not None

0 comments on commit faea902

Please sign in to comment.