|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
from datasets import load_dataset |
|
|
import chromadb |
|
|
from chromadb.config import Settings |
|
|
|
|
|
print("="*70) |
|
|
print("Loading Restaurant Review Advisor...") |
|
|
print("="*70) |
|
|
|
|
|
print("\n[1/3] Loading fine-tuned sentiment model...") |
|
|
|
|
|
try: |
|
|
sentiment_analyzer = pipeline( |
|
|
"sentiment-analysis", |
|
|
model="Isap31/restaurant-sentiment-distilbert" |
|
|
) |
|
|
print("β
Sentiment model loaded (94.93% accuracy)") |
|
|
except Exception as e: |
|
|
print(f"Error loading sentiment model: {e}") |
|
|
sentiment_analyzer = None |
|
|
|
|
|
print("\n[2/3] Setting up RAG knowledge base...") |
|
|
|
|
|
try: |
|
|
|
|
|
dataset = load_dataset("fancyzhx/yelp_polarity", split="train") |
|
|
sampled_reviews = dataset.shuffle(seed=42).select(range(500)) |
|
|
|
|
|
|
|
|
chroma_client = chromadb.Client(Settings( |
|
|
anonymized_telemetry=False, |
|
|
allow_reset=True |
|
|
)) |
|
|
|
|
|
collection = chroma_client.create_collection(name="yelp_reviews") |
|
|
|
|
|
|
|
|
documents = [] |
|
|
metadatas = [] |
|
|
ids = [] |
|
|
|
|
|
for idx, review in enumerate(sampled_reviews): |
|
|
if len(review['text']) >= 50: |
|
|
documents.append(review['text']) |
|
|
metadatas.append({'sentiment': 'positive' if review['label'] == 1 else 'negative'}) |
|
|
ids.append(f"review_{idx}") |
|
|
|
|
|
collection.add(documents=documents, metadatas=metadatas, ids=ids) |
|
|
|
|
|
print(f"β
RAG knowledge base ready with {len(documents)} reviews") |
|
|
|
|
|
|
|
|
generator = pipeline("text2text-generation", model="google/flan-t5-small", max_length=150) |
|
|
print("β
RAG generation model loaded") |
|
|
|
|
|
rag_ready = True |
|
|
except Exception as e: |
|
|
print(f"Error setting up RAG: {e}") |
|
|
rag_ready = False |
|
|
collection = None |
|
|
generator = None |
|
|
|
|
|
print("\n[3/3] Setting up application...") |
|
|
|
|
|
def analyze_sentiment(text): |
|
|
"""Analyze sentiment of restaurant review""" |
|
|
if not text.strip(): |
|
|
return "βͺ No input provided" |
|
|
|
|
|
if sentiment_analyzer is None: |
|
|
return "β Sentiment model not loaded" |
|
|
|
|
|
try: |
|
|
result = sentiment_analyzer(text)[0] |
|
|
label = result['label'] |
|
|
confidence = result['score'] |
|
|
|
|
|
if label.upper() in ['POSITIVE', 'LABEL_1', '1']: |
|
|
sentiment = f"π’ POSITIVE ({confidence:.1%} confidence)" |
|
|
interpretation = "Great review! Customer is satisfied." |
|
|
else: |
|
|
sentiment = f"π΄ NEGATIVE ({confidence:.1%} confidence)" |
|
|
interpretation = "Negative feedback detected." |
|
|
|
|
|
return f"{sentiment}\n\n{interpretation}" |
|
|
except Exception as e: |
|
|
return f"β Error: {str(e)}" |
|
|
|
|
|
def rag_query(question): |
|
|
"""RAG system - retrieve reviews and generate answer""" |
|
|
if not question.strip(): |
|
|
return "Please ask a question about restaurants." |
|
|
|
|
|
if not rag_ready: |
|
|
return "β RAG system not loaded" |
|
|
|
|
|
try: |
|
|
|
|
|
results = collection.query(query_texts=[question], n_results=3) |
|
|
|
|
|
if not results['documents'][0]: |
|
|
return "I couldn't find relevant reviews for that question." |
|
|
|
|
|
|
|
|
review_texts = [] |
|
|
sentiments = [] |
|
|
|
|
|
for doc, metadata in zip(results['documents'][0], results['metadatas'][0]): |
|
|
sentiment = metadata.get('sentiment', 'unknown') |
|
|
sentiments.append(sentiment) |
|
|
review_texts.append(f"[{sentiment.upper()}] {doc}") |
|
|
|
|
|
context = "\n\n".join(review_texts) |
|
|
|
|
|
|
|
|
prompt = f"""Based on these customer reviews, answer the question concisely. |
|
|
|
|
|
Reviews: |
|
|
{context} |
|
|
|
|
|
Question: {question} |
|
|
|
|
|
Answer:""" |
|
|
|
|
|
answer = generator(prompt, max_length=150)[0]['generated_text'] |
|
|
|
|
|
|
|
|
response = f"**Generated Answer:**\n{answer}\n\n" |
|
|
response += f"**Based on:** {len(results['documents'][0])} customer reviews " |
|
|
response += f"({sentiments.count('positive')} positive, {sentiments.count('negative')} negative)" |
|
|
|
|
|
return response |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error: {str(e)}" |
|
|
|
|
|
print("\nCreating Gradio interface...") |
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=analyze_sentiment, |
|
|
inputs=gr.Textbox( |
|
|
lines=3, |
|
|
placeholder="Enter restaurant review...", |
|
|
label="Review Text" |
|
|
), |
|
|
outputs=gr.Textbox(label="Sentiment Analysis Result"), |
|
|
title="π½οΈ Restaurant Review Advisor - Sentiment Analysis", |
|
|
description=""" |
|
|
**Component 1: Fine-Tuned DistilBERT Sentiment Model (94.93% accuracy)** |
|
|
|
|
|
Trained on 50,000 Yelp restaurant reviews. Enter a review to analyze its sentiment! |
|
|
""", |
|
|
examples=[ |
|
|
["This restaurant exceeded all expectations! The service was impeccable and food was divine."], |
|
|
["Worst dining experience ever. Cold food, rude staff, and overpriced."], |
|
|
["The ambiance was nice, but the food was just okay."], |
|
|
] |
|
|
) |
|
|
|
|
|
demo_rag = gr.Interface( |
|
|
fn=rag_query, |
|
|
inputs=gr.Textbox( |
|
|
lines=2, |
|
|
placeholder="Ask a question about restaurants...", |
|
|
label="Question" |
|
|
), |
|
|
outputs=gr.Textbox(label="RAG Answer"), |
|
|
title="π Restaurant Knowledge Query (RAG System)", |
|
|
description=""" |
|
|
**Component 2: RAG with Yelp Reviews (Retrieval + Augmentation + Generation)** |
|
|
|
|
|
Ask questions and get answers based on 500+ real customer reviews! |
|
|
""", |
|
|
examples=[ |
|
|
["What do customers say about food quality at restaurants?"], |
|
|
["Are portions typically good at restaurants?"], |
|
|
["What about customer service?"], |
|
|
] |
|
|
) |
|
|
|
|
|
app = gr.TabbedInterface( |
|
|
[demo, demo_rag], |
|
|
["Sentiment Analysis", "RAG Knowledge Query"], |
|
|
title="β Restaurant Review Advisor - INFO 452 Final Project" |
|
|
) |
|
|
|
|
|
print("β
Gradio interface ready!") |
|
|
print("="*70) |
|
|
print("Launching application...") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch() |