Spaces:

sahra02
/

marsa-moderation-api

Sleeping

File size: 12,424 Bytes

054a1ae

"""

Modèles de modération textuelle pré-entraînés

Pour détecter contenu NSFW, armes, toxicité sans entraînement

"""

from transformers import pipeline
from typing import Dict


# ============================================================================
# OPTION 1: Détecteur de toxicité multilingue (RECOMMANDÉ)
# ============================================================================

class ToxicityDetector:
    """

    Détecteur de toxicité/contenu inapproprié multilingue

    Modèle: facebook/roberta-hate-speech-dynabench-r4-target

    """
    
    def __init__(self):
        print("🔧 Chargement du détecteur de toxicité...")
        try:
            self.model_name = "facebook/roberta-hate-speech-dynabench-r4-target"
            self.classifier = pipeline(
                "text-classification",
                model=self.model_name
            )
            print(f"✓ Modèle chargé: {self.model_name}")
        except Exception as e:
            print(f"⚠️  Erreur: {e}")
            self.classifier = None
    
    def predict(self, text: str, threshold: float = 0.5) -> Dict:
        """

        Détecte si le texte est toxique/inapproprié

        

        Returns:

            {

                'is_toxic': bool,

                'confidence': float,

                'label': str

            }

        """
        if self.classifier is None:
            return {'is_toxic': False, 'confidence': 0.0, 'label': 'unknown'}
        
        result = self.classifier(text)[0]
        
        is_toxic = (result['label'] == 'hate' or result['label'] == 'offensive') and result['score'] >= threshold
        
        return {
            'is_toxic': is_toxic,
            'confidence': result['score'],
            'label': result['label']
        }


# ============================================================================
# OPTION 2: Détecteur de contenu NSFW textuel
# ============================================================================

class NSFWTextDetector:
    """

    Détecteur spécialisé pour contenu NSFW dans le texte

    Modèle: michellejieli/NSFW_text_classifier

    """
    
    def __init__(self):
        print("🔧 Chargement du détecteur NSFW textuel...")
        try:
            self.model_name = "michellejieli/NSFW_text_classifier"
            self.classifier = pipeline(
                "text-classification",
                model=self.model_name
            )
            print(f"✓ Modèle chargé: {self.model_name}")
        except Exception as e:
            print(f"⚠️  Erreur: {e}")
            self.classifier = None
    
    def predict(self, text: str, threshold: float = 0.7) -> Dict:
        """

        Détecte si le texte contient du contenu NSFW

        

        Returns:

            {

                'is_nsfw': bool,

                'confidence': float,

                'label': str

            }

        """
        if self.classifier is None:
            return {'is_nsfw': False, 'confidence': 0.0, 'label': 'unknown'}
        
        result = self.classifier(text)[0]
        
        is_nsfw = result['label'] == 'NSFW' and result['score'] >= threshold
        
        return {
            'is_nsfw': is_nsfw,
            'confidence': result['score'],
            'label': result['label']
        }


# ============================================================================
# OPTION 3: Détection par mots-clés + règles (SIMPLE ET EFFICACE)
# ============================================================================

class KeywordBasedModerator:
    """

    Modérateur basé sur des mots-clés et règles

    Simple mais très efficace pour votre cas d'usage

    """
    
    def __init__(self):
        # Mots-clés sensibles (à adapter selon vos besoins)
        self.weapon_keywords = [
            # Armes à feu
            'pistolet', 'revolver', 'fusil', 'arme', 'gun', 'rifle', 
            'calibre', 'munition', 'cartouche', 'glock', 'ak47', 'beretta',
            'arme à feu', 'arme de guerre', 'firearm',
            
            # Armes blanches
            'couteau', 'poignard', 'machette', 'sabre', 'épée', 'dague',
            'knife', 'sword', 'blade', 'lame'
        ]
        
        self.nsfw_keywords = [
            # Contenu adulte
            'sexe', 'xxx', 'porn', 'porno', 'pornographique', 'hentai',
            'adulte', 'érotique', 'nue', 'nu', 'nudité', 'sexy',
            'sex', 'nude', 'explicit', 'nsfw', 'erotic',
            'escort', 'prostitution', 'massage érotique',
            
            # Contenu explicite
            'orgasme', 'viagra', 'cialis', 'sexuel', 'sexuelle'
        ]
        
        self.spam_keywords = [
            'cliquez ici', 'argent facile', 'devenez riche',
            'miracle', 'gratuit!!!', '100% garanti',
            'click here', 'make money fast', 'limited offer'
        ]
    
    def predict(self, title: str, description: str) -> Dict:
        """

        Détecte le contenu inapproprié par mots-clés

        

        Returns:

            {

                'approved': bool,

                'reason': str,

                'detected_keywords': List[str],

                'category': str  # 'weapon', 'nsfw', 'spam', 'safe'

            }

        """
        text = f"{title} {description}".lower()
        
        detected_keywords = []
        category = 'safe'
        reason = 'Texte approuvé'
        
        # Vérifier les armes
        for keyword in self.weapon_keywords:
            if keyword.lower() in text:
                detected_keywords.append(keyword)
                category = 'weapon'
        
        # Vérifier le contenu NSFW
        for keyword in self.nsfw_keywords:
            if keyword.lower() in text:
                detected_keywords.append(keyword)
                category = 'nsfw'
        
        # Vérifier le spam
        for keyword in self.spam_keywords:
            if keyword.lower() in text:
                detected_keywords.append(keyword)
                category = 'spam'
        
        # Décision
        approved = len(detected_keywords) == 0
        
        if not approved:
            keywords_str = ', '.join(detected_keywords[:3])  # Top 3
            if category == 'weapon':
                reason = f"Mention d'arme détectée - Texte: {keywords_str}"
            elif category == 'nsfw':
                reason = f"Contenu adulte détecté - Texte: {keywords_str}"
            elif category == 'spam':
                reason = f"Contenu spam détecté - Texte: {keywords_str}"
        
        return {
            'approved': approved,
            'reason': reason,
            'detected_keywords': detected_keywords,
            'category': category,
            'confidence': 1.0 if detected_keywords else 0.0
        }


# ============================================================================
# OPTION 4: Approche hybride (MEILLEURE SOLUTION)
# ============================================================================

class HybridTextModerator:
    """

    Combine mots-clés + modèle ML pour meilleure précision

    """
    
    def __init__(self, use_ml_model: bool = True):
        # Détecteur par mots-clés (rapide, précis)
        self.keyword_detector = KeywordBasedModerator()
        
        # Détecteurs ML (plus lent, plus fin)
        self.ml_detectors = []
        
        if use_ml_model:
            try:
                self.toxicity_detector = ToxicityDetector()
                if self.toxicity_detector.classifier:
                    self.ml_detectors.append(self.toxicity_detector)
            except:
                pass
            
            try:
                self.nsfw_detector = NSFWTextDetector()
                if self.nsfw_detector.classifier:
                    self.ml_detectors.append(self.nsfw_detector)
            except:
                pass
    
    def predict(self, title: str, description: str) -> Dict:
        """

        Modération hybride: mots-clés + ML

        

        Stratégie:

        1. Vérifier les mots-clés (blocage immédiat si détecté)

        2. Si pas de mots-clés, utiliser le ML pour détecter les cas subtils

        """
        
        text = f"{title} {description}"
        
        # ÉTAPE 1: Vérification par mots-clés (rapide)
        keyword_result = self.keyword_detector.predict(title, description)
        
        if not keyword_result['approved']:
            # Blocage immédiat si mots-clés détectés
            return {
                'decision': 'rejected',
                'confidence': 1.0,
                'reason': keyword_result['reason'],
                'method': 'keywords',
                'details': keyword_result
            }
        
        # ÉTAPE 2: Vérification ML (plus fin, plus lent)
        if self.ml_detectors:
            for detector in self.ml_detectors:
                if isinstance(detector, ToxicityDetector):
                    ml_result = detector.predict(text)
                    if ml_result['is_toxic']:
                        return {
                            'decision': 'rejected',
                            'confidence': ml_result['confidence'],
                            'reason': f"Contenu toxique détecté ({ml_result['label']})",
                            'method': 'ml_toxicity',
                            'details': ml_result
                        }
                
                elif isinstance(detector, NSFWTextDetector):
                    ml_result = detector.predict(text)
                    if ml_result['is_nsfw']:
                        return {
                            'decision': 'rejected',
                            'confidence': ml_result['confidence'],
                            'reason': "Contenu NSFW détecté",
                            'method': 'ml_nsfw',
                            'details': ml_result
                        }
        
        # ÉTAPE 3: Tout est OK
        return {
            'decision': 'approved',
            'confidence': 1.0,
            'reason': 'Texte approuvé',
            'method': 'hybrid',
            'details': {}
        }


# ============================================================================
# COMPARAISON DES OPTIONS
# ============================================================================

def compare_moderators():
    """

    Compare les différents modérateurs

    """
    
    print("=" * 70)
    print("COMPARAISON DES MODÉRATEURS TEXTUELS")
    print("=" * 70)
    
    # Textes de test
    test_texts = [
        ("iPhone 13 Pro", "Téléphone en excellent état"),
        ("Vente de pistolet", "Arme de défense calibre 9mm"),
        ("Massage à domicile", "Service xxx pour adultes"),
        ("Belle villa Dakar", "Maison spacieuse avec piscine"),
        ("Argent facile", "Cliquez ici pour devenir riche!!!"),
    ]
    
    # Test 1: Mots-clés
    print("\n📋 MODÉRATEUR PAR MOTS-CLÉS (Rapide)")
    print("-" * 70)
    keyword_mod = KeywordBasedModerator()
    for title, desc in test_texts:
        result = keyword_mod.predict(title, desc)
        status = "✅" if result['approved'] else "❌"
        print(f"{status} {title}: {result['reason']}")
    
    # Test 2: Hybride
    print("\n🔀 MODÉRATEUR HYBRIDE (Recommandé)")
    print("-" * 70)
    hybrid_mod = HybridTextModerator(use_ml_model=False)  # Sans ML pour démo
    for title, desc in test_texts:
        result = hybrid_mod.predict(title, desc)
        status = "✅" if result['decision'] == 'approved' else "❌"
        print(f"{status} {title}: {result['reason']}")
    
    print("\n" + "=" * 70)
    print("RECOMMANDATIONS")
    print("=" * 70)
    print("""

    1. COMMENCER avec KeywordBasedModerator (simple, rapide, efficace)

    2. AJOUTER vos propres mots-clés selon votre contexte

    3. PASSER à HybridTextModerator si besoin de plus de finesse

    4. FINE-TUNER DistilBERT si vous avez beaucoup de données labelisées

    """)


if __name__ == "__main__":
    compare_moderators()