zestedesavoir · Jan-Ha-He · Jan 27, 2025 · Feb 22, 2025 · Mar 10, 2025 · Mar 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -14,6 +14,7 @@ base.db
 /zds/_version.py
 /geodata
 /errors/css
+/antispam-data
 
 /tutoriels-private-test
 /tutoriels-public-test

diff --git a/doc/source/back-end-code/antispam.rst b/doc/source/back-end-code/antispam.rst
@@ -0,0 +1,84 @@
+.. _module-antispam:
+
+=======================
+Module Anti-Spam de ZDS
+=======================
+
+Le module ``zds.antispam`` fournit un système de détection de contenu indésirable.
+
+Structure du module
+===================
+::
+
+    zds/antispam/
+    ├── __init__.py
+    ├── apps.py
+    ├── management/
+    ├── receivers.py         # Signaux
+    ├── spam_detector.py     # Détection principale
+    ├── spam_fields.py       # Champs surveillés
+    ├── spam_model_manager.py # Gestion des modèles d'entrainement
+    └── tests/               # Tests unitaires
+
+Fonctionnalités principales
+===========================
+- Détection de spam dans différents types de contenu
+- Entraînement de modèles spécifiques par type de contenu
+- Système d'alertes automatisées
+
+Composants clés
+===============
+
+SpamDetector (spam_detector.py)
+-------------------------------
+.. autoclass:: zds.antispam.spam_detector.SpamDetector
+   :members:
+   :undoc-members:
+
+   Principales méthodes:
+   - ``check_text(text, content_type)`` → bool
+   - ``send_alert(profile, field_name)`` → None
+
+SpamModelManager (spam_model_manager.py)
+----------------------------------------
+.. autoclass:: zds.antispam.spam_model_manager.SpamModelManager
+   :members:
+   :undoc-members:
+
+   Fonctionnalités:
+   - Entraînement des modèles (``train(content_type)``)
+   - Sauvegarde/chargement des modèles
+
+Utilisation typique
+===================
+
+Détection simple de la bibliographie:
+
+.. code-block:: python
+
+   from zds.antispam.spam_detector import SpamDetector
+
+   detector = SpamDetector()
+   if detector.check_text(user_input, "PROFILE"):
+       detector.send_alert(self.clean_profile, "biography")
+
+Entraînement d'un modèle:
+
+.. code-block:: python
+
+   from zds.antispam.spam_model_manager import SpamModelManager
+
+   manager = SpamModelManager()
+   manager.train("PROFILE")
+
+Intégration avec les signaux
+============================
+Le module écoute automatiquement les sauvegardes de modèles via ``receivers.py``.
+
+Tests
+=====
+Pour lancer les tests:
+
+.. code-block:: bash
+
+   python manage.py test zds.antispam.tests
diff --git a/doc/source/back-end-code/arborescence-back.rst b/doc/source/back-end-code/arborescence-back.rst
@@ -14,6 +14,8 @@ On retrouve un dossier pour chaque module du site :
     zds/
     ├── article/ # module des articles
     │   └── ...
+    ├── antispam/ # module d'antispam
+    │   └── ...
     ├── featured/ # module des mises en avant
     │   └── ...
     ├── forum/ # module des forums

diff --git a/requirements.txt b/requirements.txt
@@ -17,6 +17,7 @@ lxml==5.3.0
 Pillow==10.4.0
 pymemcache==4.0.0
 requests==2.32.3
+scikit-learn==1.6.1
 typesense==0.21.0
 ua-parser==0.18.0
 

diff --git a/zds/antispam/__init__.py b/zds/antispam/__init__.py
diff --git a/zds/antispam/apps.py b/zds/antispam/apps.py
@@ -0,0 +1,8 @@
+from django.apps import AppConfig
+
+
+class AntispamConfig(AppConfig):
+    name = "zds.antispam"
+
+    def ready(self):
+        from . import receivers  # noqa
diff --git a/zds/antispam/management/__init__.py b/zds/antispam/management/__init__.py
diff --git a/zds/antispam/management/commands/__init__.py b/zds/antispam/management/commands/__init__.py
diff --git a/zds/antispam/management/commands/antispam_train.py b/zds/antispam/management/commands/antispam_train.py
@@ -0,0 +1,40 @@
+from django.core.management.base import BaseCommand
+
+from zds.antispam.spam_fields import spam_fields
+from zds.antispam.spam_model_manager import SpamModelManager
+
+
+class Command(BaseCommand):
+    def __init__(self):
+        # Dynamically extract available models from spam_fields
+        available_models = {field["scope"] for field in spam_fields}
+        self.help = (
+            "Retrain the spam filter model(s) and save them to a file.\n"
+            f"The available models are: {', '.join(available_models)}.\n"
+            "Use the --model option to specify a model to train, or omit it to train all models."
+        )
+        super().__init__()
+
+    def add_arguments(self, parser):
+        # Dynamically extract available models from spam_fields
+        available_models = {field["scope"] for field in spam_fields}
+        parser.add_argument(
+            "--model",
+            type=str,
+            choices=available_models,
+            help=f"Specify the model to train ({', '.join(available_models)}). If omitted, all models will be trained.",
+        )
+
+    def handle(self, *args, **options):
+        model_manager = SpamModelManager()
+
+        if options["model"]:
+            self.stdout.write(f"Starting retraining of the {options['model']} spam filter model...")
+            model_manager.train(options["model"])
+            self.stdout.write(f"Retraining of the {options['model']} model completed successfully.")
+        else:
+            self.stdout.write("Starting retraining of all spam filter models...")
+            # Dynamically train all models based on spam_fields
+            for model in {field["scope"] for field in spam_fields}:
+                model_manager.train(model)
+            self.stdout.write("Retraining of all models completed successfully.")
diff --git a/zds/antispam/receivers.py b/zds/antispam/receivers.py
@@ -0,0 +1,20 @@
+from django.db.models.signals import post_save
+from django.dispatch import receiver
+
+from zds.antispam.spam_detector import SpamDetector
+from zds.antispam.spam_fields import spam_fields
+
+
+@receiver(post_save)
+def analyze_record(sender, instance, **kwargs):
+    """
+    Signal handler to detect spam in configured fields.
+    """
+    for field_config in spam_fields:
+        if isinstance(instance, field_config["model"]):
+            detector = SpamDetector()
+            for field in field_config["fields"]:
+                field_value = getattr(instance, field, None)
+                if field_value and detector.check_text(field_value, field_config["scope"]):
+                    detector.send_alert(instance, field)
+            break
diff --git a/zds/antispam/spam_detector.py b/zds/antispam/spam_detector.py
@@ -0,0 +1,88 @@
+import logging
+from datetime import datetime
+
+from django.contrib.auth.models import User
+from django.utils.translation import gettext_lazy as _
+
+from zds.antispam.spam_fields import spam_fields
+from zds.antispam.spam_model_manager import SpamModelManager
+from zds.utils.models import Alert
+
+
+class SpamDetector:
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self.model_manager = SpamModelManager()
+
+    def check_text(self, text, content_type):
+        """
+        Check if a given text is spam for the specified content type.
+        """
+        if not text:
+            self.logger.warning(f"Skipped spam check: Empty text for content type '{content_type}'.")
+            return False
+
+        try:
+            prediction = self.model_manager.predict(content_type, [text])[0]
+            if prediction == 0:  # 0 indicates spam
+                self.logger.info(
+                    f"✘ Spam detected for content type '{content_type}'. Text: '{text[:30]}...' (Length: {len(text)})"
+                )
+                return True
+            else:
+                self.logger.info(
+                    f"✔️ No spam detected for content type '{content_type}'. Text: '{text[:30]}...' (Length: {len(text)})"
+                )
+                return False
+        except Exception as e:
+            self.logger.error(f"Error during spam detection for content type '{content_type}': {e}")
+            return False
+
+    def send_alert(self, instance, field_name):
+        """
+        Create an alert for a spam-suspect field with detailed context.
+        """
+        try:
+            # Find the spam field configuration for the instance
+            field_config = next(
+                (
+                    config
+                    for config in spam_fields
+                    if isinstance(instance, config["model"]) and field_name in config["fields"]
+                ),
+                None,
+            )
+            if not field_config:
+                self.logger.error(f"No spam field configuration found for {type(instance).__name__}.{field_name}")
+                return
+
+            # Extract scope and instance info
+            scope = field_config["scope"]
+            instance_info = field_config["get_instance_info"](instance)
+
+            # Map scope to the correct Alert model field
+            scope_to_alert_kwargs = {
+                "PROFILE": "profile",
+                "FORUM": "comment",
+                "CONTENT": "content",
+            }
+
+            if scope not in scope_to_alert_kwargs:
+                self.logger.error(f"Unsupported scope '{scope}' for alert creation.")
+                return
+
+            alert_kwargs = {
+                "author": User.objects.get(username="antispam"),
+                "scope": scope,
+                "text": _(f"Potential spam detected in {instance_info}, field '{field_name}'."),
+                "pubdate": datetime.now(),
+                scope_to_alert_kwargs[scope]: instance,
+            }
+
+            # Create the alert
+            Alert.objects.create(**alert_kwargs)
+            self.logger.info(f"Spam-Alert for {instance_info}, field '{field_name}' created.")
+        except User.DoesNotExist:
+            self.logger.error("The 'antispam' user does not exist. Please create this user.")
+        except Exception as e:
+            self.logger.error(f"Failed to create spam alert: {e}")
diff --git a/zds/antispam/spam_fields.py b/zds/antispam/spam_fields.py
@@ -0,0 +1,17 @@
+from zds.forum.models import Comment
+from zds.member.models import Profile
+
+spam_fields = [
+    {
+        "scope": "PROFILE",
+        "model": Profile,
+        "fields": ["biography", "sign"],
+        "get_instance_info": str,
+    },
+    {
+        "scope": "FORUM",
+        "model": Comment,
+        "fields": ["text"],
+        "get_instance_info": lambda instance: str(instance.author.username),
+    },
+]