Spaces:

sahra02
/

marsa-moderation-api

Sleeping

App Files Files Community

marsa-moderation-api / text_moderation_system.py

sahra02

Upload 6 files

054a1ae verified 10 days ago

raw

history blame contribute delete

12.4 kB

	"""
	Modèles de modération textuelle pré-entraînés
	Pour détecter contenu NSFW, armes, toxicité sans entraînement
	"""

	from transformers import pipeline
	from typing import Dict


	# ============================================================================
	# OPTION 1: Détecteur de toxicité multilingue (RECOMMANDÉ)
	# ============================================================================

	class ToxicityDetector:
	"""
	Détecteur de toxicité/contenu inapproprié multilingue
	Modèle: facebook/roberta-hate-speech-dynabench-r4-target
	"""

	def __init__(self):
	print("🔧 Chargement du détecteur de toxicité...")
	try:
	self.model_name = "facebook/roberta-hate-speech-dynabench-r4-target"
	self.classifier = pipeline(
	"text-classification",
	model=self.model_name
	)
	print(f"✓ Modèle chargé: {self.model_name}")
	except Exception as e:
	print(f"⚠️ Erreur: {e}")
	self.classifier = None

	def predict(self, text: str, threshold: float = 0.5) -> Dict:
	"""
	Détecte si le texte est toxique/inapproprié

	Returns:
	{
	'is_toxic': bool,
	'confidence': float,
	'label': str
	}
	"""
	if self.classifier is None:
	return {'is_toxic': False, 'confidence': 0.0, 'label': 'unknown'}

	result = self.classifier(text)[0]

	is_toxic = (result['label'] == 'hate' or result['label'] == 'offensive') and result['score'] >= threshold

	return {
	'is_toxic': is_toxic,
	'confidence': result['score'],
	'label': result['label']
	}


	# ============================================================================
	# OPTION 2: Détecteur de contenu NSFW textuel
	# ============================================================================

	class NSFWTextDetector:
	"""
	Détecteur spécialisé pour contenu NSFW dans le texte
	Modèle: michellejieli/NSFW_text_classifier
	"""

	def __init__(self):
	print("🔧 Chargement du détecteur NSFW textuel...")
	try:
	self.model_name = "michellejieli/NSFW_text_classifier"
	self.classifier = pipeline(
	"text-classification",
	model=self.model_name
	)
	print(f"✓ Modèle chargé: {self.model_name}")
	except Exception as e:
	print(f"⚠️ Erreur: {e}")
	self.classifier = None

	def predict(self, text: str, threshold: float = 0.7) -> Dict:
	"""
	Détecte si le texte contient du contenu NSFW

	Returns:
	{
	'is_nsfw': bool,
	'confidence': float,
	'label': str
	}
	"""
	if self.classifier is None:
	return {'is_nsfw': False, 'confidence': 0.0, 'label': 'unknown'}

	result = self.classifier(text)[0]

	is_nsfw = result['label'] == 'NSFW' and result['score'] >= threshold

	return {
	'is_nsfw': is_nsfw,
	'confidence': result['score'],
	'label': result['label']
	}


	# ============================================================================
	# OPTION 3: Détection par mots-clés + règles (SIMPLE ET EFFICACE)
	# ============================================================================

	class KeywordBasedModerator:
	"""
	Modérateur basé sur des mots-clés et règles
	Simple mais très efficace pour votre cas d'usage
	"""

	def __init__(self):
	# Mots-clés sensibles (à adapter selon vos besoins)
	self.weapon_keywords = [
	# Armes à feu
	'pistolet', 'revolver', 'fusil', 'arme', 'gun', 'rifle',
	'calibre', 'munition', 'cartouche', 'glock', 'ak47', 'beretta',
	'arme à feu', 'arme de guerre', 'firearm',

	# Armes blanches
	'couteau', 'poignard', 'machette', 'sabre', 'épée', 'dague',
	'knife', 'sword', 'blade', 'lame'
	]

	self.nsfw_keywords = [
	# Contenu adulte
	'sexe', 'xxx', 'porn', 'porno', 'pornographique', 'hentai',
	'adulte', 'érotique', 'nue', 'nu', 'nudité', 'sexy',
	'sex', 'nude', 'explicit', 'nsfw', 'erotic',
	'escort', 'prostitution', 'massage érotique',

	# Contenu explicite
	'orgasme', 'viagra', 'cialis', 'sexuel', 'sexuelle'
	]

	self.spam_keywords = [
	'cliquez ici', 'argent facile', 'devenez riche',
	'miracle', 'gratuit!!!', '100% garanti',
	'click here', 'make money fast', 'limited offer'
	]

	def predict(self, title: str, description: str) -> Dict:
	"""
	Détecte le contenu inapproprié par mots-clés

	Returns:
	{
	'approved': bool,
	'reason': str,
	'detected_keywords': List[str],
	'category': str # 'weapon', 'nsfw', 'spam', 'safe'
	}
	"""
	text = f"{title} {description}".lower()

	detected_keywords = []
	category = 'safe'
	reason = 'Texte approuvé'

	# Vérifier les armes
	for keyword in self.weapon_keywords:
	if keyword.lower() in text:
	detected_keywords.append(keyword)
	category = 'weapon'

	# Vérifier le contenu NSFW
	for keyword in self.nsfw_keywords:
	if keyword.lower() in text:
	detected_keywords.append(keyword)
	category = 'nsfw'

	# Vérifier le spam
	for keyword in self.spam_keywords:
	if keyword.lower() in text:
	detected_keywords.append(keyword)
	category = 'spam'

	# Décision
	approved = len(detected_keywords) == 0

	if not approved:
	keywords_str = ', '.join(detected_keywords[:3]) # Top 3
	if category == 'weapon':
	reason = f"Mention d'arme détectée - Texte: {keywords_str}"
	elif category == 'nsfw':
	reason = f"Contenu adulte détecté - Texte: {keywords_str}"
	elif category == 'spam':
	reason = f"Contenu spam détecté - Texte: {keywords_str}"

	return {
	'approved': approved,
	'reason': reason,
	'detected_keywords': detected_keywords,
	'category': category,
	'confidence': 1.0 if detected_keywords else 0.0
	}


	# ============================================================================
	# OPTION 4: Approche hybride (MEILLEURE SOLUTION)
	# ============================================================================

	class HybridTextModerator:
	"""
	Combine mots-clés + modèle ML pour meilleure précision
	"""

	def __init__(self, use_ml_model: bool = True):
	# Détecteur par mots-clés (rapide, précis)
	self.keyword_detector = KeywordBasedModerator()

	# Détecteurs ML (plus lent, plus fin)
	self.ml_detectors = []

	if use_ml_model:
	try:
	self.toxicity_detector = ToxicityDetector()
	if self.toxicity_detector.classifier:
	self.ml_detectors.append(self.toxicity_detector)
	except:
	pass

	try:
	self.nsfw_detector = NSFWTextDetector()
	if self.nsfw_detector.classifier:
	self.ml_detectors.append(self.nsfw_detector)
	except:
	pass

	def predict(self, title: str, description: str) -> Dict:
	"""
	Modération hybride: mots-clés + ML

	Stratégie:
	1. Vérifier les mots-clés (blocage immédiat si détecté)
	2. Si pas de mots-clés, utiliser le ML pour détecter les cas subtils
	"""

	text = f"{title} {description}"

	# ÉTAPE 1: Vérification par mots-clés (rapide)
	keyword_result = self.keyword_detector.predict(title, description)

	if not keyword_result['approved']:
	# Blocage immédiat si mots-clés détectés
	return {
	'decision': 'rejected',
	'confidence': 1.0,
	'reason': keyword_result['reason'],
	'method': 'keywords',
	'details': keyword_result
	}

	# ÉTAPE 2: Vérification ML (plus fin, plus lent)
	if self.ml_detectors:
	for detector in self.ml_detectors:
	if isinstance(detector, ToxicityDetector):
	ml_result = detector.predict(text)
	if ml_result['is_toxic']:
	return {
	'decision': 'rejected',
	'confidence': ml_result['confidence'],
	'reason': f"Contenu toxique détecté ({ml_result['label']})",
	'method': 'ml_toxicity',
	'details': ml_result
	}

	elif isinstance(detector, NSFWTextDetector):
	ml_result = detector.predict(text)
	if ml_result['is_nsfw']:
	return {
	'decision': 'rejected',
	'confidence': ml_result['confidence'],
	'reason': "Contenu NSFW détecté",
	'method': 'ml_nsfw',
	'details': ml_result
	}

	# ÉTAPE 3: Tout est OK
	return {
	'decision': 'approved',
	'confidence': 1.0,
	'reason': 'Texte approuvé',
	'method': 'hybrid',
	'details': {}
	}


	# ============================================================================
	# COMPARAISON DES OPTIONS
	# ============================================================================

	def compare_moderators():
	"""
	Compare les différents modérateurs
	"""

	print("=" * 70)
	print("COMPARAISON DES MODÉRATEURS TEXTUELS")
	print("=" * 70)

	# Textes de test
	test_texts = [
	("iPhone 13 Pro", "Téléphone en excellent état"),
	("Vente de pistolet", "Arme de défense calibre 9mm"),
	("Massage à domicile", "Service xxx pour adultes"),
	("Belle villa Dakar", "Maison spacieuse avec piscine"),
	("Argent facile", "Cliquez ici pour devenir riche!!!"),
	]

	# Test 1: Mots-clés
	print("\n📋 MODÉRATEUR PAR MOTS-CLÉS (Rapide)")
	print("-" * 70)
	keyword_mod = KeywordBasedModerator()
	for title, desc in test_texts:
	result = keyword_mod.predict(title, desc)
	status = "✅" if result['approved'] else "❌"
	print(f"{status} {title}: {result['reason']}")

	# Test 2: Hybride
	print("\n🔀 MODÉRATEUR HYBRIDE (Recommandé)")
	print("-" * 70)
	hybrid_mod = HybridTextModerator(use_ml_model=False) # Sans ML pour démo
	for title, desc in test_texts:
	result = hybrid_mod.predict(title, desc)
	status = "✅" if result['decision'] == 'approved' else "❌"
	print(f"{status} {title}: {result['reason']}")

	print("\n" + "=" * 70)
	print("RECOMMANDATIONS")
	print("=" * 70)
	print("""
	1. COMMENCER avec KeywordBasedModerator (simple, rapide, efficace)
	2. AJOUTER vos propres mots-clés selon votre contexte
	3. PASSER à HybridTextModerator si besoin de plus de finesse
	4. FINE-TUNER DistilBERT si vous avez beaucoup de données labelisées
	""")


	if __name__ == "__main__":
	compare_moderators()