Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| from flask import Flask, request, jsonify, send_from_directory | |
| from flask import Flask, render_template | |
| from flask_cors import CORS | |
| from dotenv import load_dotenv | |
| import googleapiclient.discovery | |
| import googleapiclient.errors | |
| # Final Upgrade: Using a RoBERTa model trained on Twitter data for maximum slang/context accuracy | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
| # --- Setup --- | |
| app = Flask(__name__) | |
| CORS(app) | |
| load_dotenv() | |
| API_KEY = os.getenv("YOUTUBE_API_KEY") | |
| if not API_KEY: | |
| raise ValueError("YOUTUBE_API_KEY not found in .env file. Please create a .env file and add your key.") | |
| # --- State-of-the-Art AI Model Loading (Twitter Emotion) --- | |
| # This model is one of the best for understanding slang and informal text. | |
| # The model will be downloaded on the first run. | |
| print("Loading Twitter-trained RoBERTa model for emotion classification...") | |
| MODEL = "cardiffnlp/twitter-roberta-base-emotion-latest" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL) | |
| emotion_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=None) | |
| print("Model loaded successfully.") | |
| # --- Serve Frontend --- | |
| def serve_index(): | |
| return render_template("index.html") | |
| # --- Helper Function --- | |
| def extract_video_id(url): | |
| """Extracts the YouTube video ID from various common URL formats.""" | |
| patterns = [ | |
| r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11})", | |
| r"(?:https?:\/\/)?(?:www\.)?youtu\.be\/([a-zA-Z0-9_-]{11})", | |
| r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/embed\/([a-zA-Z0-9_-]{11})", | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, url) | |
| if match: | |
| return match.group(1) | |
| return None | |
| # --- API Endpoint --- | |
| def analyze_comments(): | |
| """ | |
| API endpoint that receives a YouTube URL, fetches up to 700 comments, | |
| performs detailed emotion analysis, and returns a comprehensive JSON response. | |
| """ | |
| data = request.get_json() | |
| if not data or "youtube_url" not in data: | |
| return jsonify({"error": "youtube_url is required"}), 400 | |
| video_url = data["youtube_url"] | |
| video_id = extract_video_id(video_url) | |
| if not video_id: | |
| return jsonify({"error": "Invalid YouTube URL"}), 400 | |
| try: | |
| youtube = googleapiclient.discovery.build( | |
| "youtube", "v3", developerKey=API_KEY | |
| ) | |
| video_request = youtube.videos().list(part="snippet", id=video_id) | |
| video_response = video_request.execute() | |
| if not video_response.get("items"): | |
| return jsonify({"error": "Video not found."}), 404 | |
| video_snippet = video_response["items"][0]["snippet"] | |
| video_details = { | |
| "title": video_snippet["title"], | |
| "thumbnail_url": video_snippet["thumbnails"]["medium"]["url"] | |
| } | |
| all_comments = [] | |
| next_page_token = None | |
| for _ in range(7): # Fetch up to 700 comments | |
| comment_request = youtube.commentThreads().list( | |
| part="snippet", videoId=video_id, maxResults=100, | |
| textFormat="plainText", pageToken=next_page_token | |
| ) | |
| comment_response = comment_request.execute() | |
| for item in comment_response.get("items", []): | |
| comment_text = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"] | |
| # Filter out very short, non-meaningful comments | |
| if len(comment_text.split()) > 2: | |
| all_comments.append(comment_text) | |
| next_page_token = comment_response.get("nextPageToken") | |
| if not next_page_token: | |
| break | |
| if not all_comments: | |
| return jsonify({ | |
| "error": "No meaningful comments found or comments are disabled for this video.", | |
| "video_details": video_details | |
| }), 200 | |
| # This model outputs: anger, joy, optimism, sadness | |
| emotion_labels = ["anger", "joy", "optimism", "sadness"] | |
| emotions = {label: 0 for label in emotion_labels} | |
| comment_analysis = [] | |
| batch_size = 16 | |
| for i in range(0, len(all_comments), batch_size): | |
| batch = all_comments[i:i + batch_size] | |
| results = emotion_pipeline(batch, truncation=True) if batch else [] | |
| for j, result in enumerate(results): | |
| comment_text = batch[j] | |
| top_emotion = result[0] | |
| label = top_emotion['label'] | |
| score = top_emotion['score'] | |
| comment_analysis.append({"text": comment_text, "label": label, "score": score}) | |
| if label in emotions: | |
| emotions[label] += 1 | |
| comment_analysis.sort(key=lambda x: x["score"], reverse=True) | |
| top_comments_by_emotion = {label: [] for label in emotion_labels} | |
| for comment in comment_analysis: | |
| label = comment['label'] | |
| if label in top_comments_by_emotion and len(top_comments_by_emotion[label]) < 2: | |
| top_comments_by_emotion[label].append(comment['text']) | |
| response_data = { | |
| "emotions": emotions, | |
| "video_details": video_details, | |
| "top_comments_by_emotion": top_comments_by_emotion, | |
| "total_comments_analyzed": len(all_comments) | |
| } | |
| return jsonify(response_data), 200 | |
| except googleapiclient.errors.HttpError as e: | |
| try: | |
| error_details = e.error_details[0] | |
| if error_details["reason"] == "commentsDisabled": | |
| return jsonify({ | |
| "error": "Comments are disabled for this video.", | |
| "video_details": video_details | |
| }), 200 | |
| except (AttributeError, IndexError): | |
| pass | |
| print(f"An API error occurred: {e}") | |
| return jsonify({"error": "Could not retrieve comments. Please check the video URL and API key."}), 400 | |
| except Exception as e: | |
| print(f"An unexpected error occurred: {e}") | |
| return jsonify({"error": "An internal server error occurred."}), 500 | |
| if __name__ == "__main__": | |
| app.run(port=5000, debug=True) | |