File size: 5,975 Bytes
2c4418d
 
 
 
 
 
2520be0
2c4418d
2520be0
2c4418d
2520be0
2c4418d
2520be0
 
 
 
 
 
 
 
 
2c4418d
2520be0
2c4418d
2520be0
 
 
9572d2f
2520be0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5ad2fc
2520be0
2c4418d
 
 
 
2520be0
2c4418d
2520be0
 
2c4418d
2520be0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c4418d
 
 
 
 
 
2520be0
 
2c4418d
2520be0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c4418d
 
2520be0
2c4418d
 
2520be0
 
 
 
 
 
 
 
2c4418d
2520be0
2c4418d
2520be0
 
2c4418d
2520be0
 
 
 
 
 
 
 
 
 
9572d2f
2520be0
9572d2f
2520be0
9572d2f
2520be0
 
 
 
 
9572d2f
2520be0
2c4418d
2520be0
 
 
 
 
 
 
 
9572d2f
 
 
 
 
 
2520be0
 
 
 
9572d2f
2520be0
 
 
 
 
 
 
2c4418d
 
2520be0
9572d2f
2c4418d
 
2520be0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import gradio as gr
from transformers import pipeline
from datasets import load_dataset
import chromadb
from chromadb.config import Settings

print("="*70)
print("Loading Restaurant Review Advisor...")
print("="*70)

print("\n[1/3] Loading fine-tuned sentiment model...")

try:
    sentiment_analyzer = pipeline(
        "sentiment-analysis",
        model="Isap31/restaurant-sentiment-distilbert"
    )
    print("βœ… Sentiment model loaded (94.93% accuracy)")
except Exception as e:
    print(f"Error loading sentiment model: {e}")
    sentiment_analyzer = None

print("\n[2/3] Setting up RAG knowledge base...")

try:
    # Load dataset
    dataset = load_dataset("fancyzhx/yelp_polarity", split="train")
    sampled_reviews = dataset.shuffle(seed=42).select(range(500))
    
    # Create ChromaDB
    chroma_client = chromadb.Client(Settings(
        anonymized_telemetry=False,
        allow_reset=True
    ))
    
    collection = chroma_client.create_collection(name="yelp_reviews")
    
    # Add reviews
    documents = []
    metadatas = []
    ids = []
    
    for idx, review in enumerate(sampled_reviews):
        if len(review['text']) >= 50:
            documents.append(review['text'])
            metadatas.append({'sentiment': 'positive' if review['label'] == 1 else 'negative'})
            ids.append(f"review_{idx}")
    
    collection.add(documents=documents, metadatas=metadatas, ids=ids)
    
    print(f"βœ… RAG knowledge base ready with {len(documents)} reviews")
    
    # Load generation model
    generator = pipeline("text2text-generation", model="google/flan-t5-small", max_length=150)
    print("βœ… RAG generation model loaded")
    
    rag_ready = True
except Exception as e:
    print(f"Error setting up RAG: {e}")
    rag_ready = False
    collection = None
    generator = None
    
print("\n[3/3] Setting up application...")

def analyze_sentiment(text):
    """Analyze sentiment of restaurant review"""
    if not text.strip():
        return "βšͺ No input provided"
    
    if sentiment_analyzer is None:
        return "❌ Sentiment model not loaded"
    
    try:
        result = sentiment_analyzer(text)[0]
        label = result['label']
        confidence = result['score']
        
        if label.upper() in ['POSITIVE', 'LABEL_1', '1']:
            sentiment = f"🟒 POSITIVE ({confidence:.1%} confidence)"
            interpretation = "Great review! Customer is satisfied."
        else:
            sentiment = f"πŸ”΄ NEGATIVE ({confidence:.1%} confidence)"
            interpretation = "Negative feedback detected."
        
        return f"{sentiment}\n\n{interpretation}"
    except Exception as e:
        return f"❌ Error: {str(e)}"

def rag_query(question):
    """RAG system - retrieve reviews and generate answer"""
    if not question.strip():
        return "Please ask a question about restaurants."
    
    if not rag_ready:
        return "❌ RAG system not loaded"
    
    try:
        # Retrieval
        results = collection.query(query_texts=[question], n_results=3)
        
        if not results['documents'][0]:
            return "I couldn't find relevant reviews for that question."
        
        # Build context
        review_texts = []
        sentiments = []
        
        for doc, metadata in zip(results['documents'][0], results['metadatas'][0]):
            sentiment = metadata.get('sentiment', 'unknown')
            sentiments.append(sentiment)
            review_texts.append(f"[{sentiment.upper()}] {doc}")
        
        context = "\n\n".join(review_texts)
        
        # Generate
        prompt = f"""Based on these customer reviews, answer the question concisely.

Reviews:
{context}

Question: {question}

Answer:"""
        
        answer = generator(prompt, max_length=150)[0]['generated_text']
        
        # Format response
        response = f"**Generated Answer:**\n{answer}\n\n"
        response += f"**Based on:** {len(results['documents'][0])} customer reviews "
        response += f"({sentiments.count('positive')} positive, {sentiments.count('negative')} negative)"
        
        return response
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

print("\nCreating Gradio interface...")

demo = gr.Interface(
    fn=analyze_sentiment,
    inputs=gr.Textbox(
        lines=3,
        placeholder="Enter restaurant review...",
        label="Review Text"
    ),
    outputs=gr.Textbox(label="Sentiment Analysis Result"),
    title="🍽️ Restaurant Review Advisor - Sentiment Analysis",
    description="""
    **Component 1: Fine-Tuned DistilBERT Sentiment Model (94.93% accuracy)**
    
    Trained on 50,000 Yelp restaurant reviews. Enter a review to analyze its sentiment!
    """,
    examples=[
        ["This restaurant exceeded all expectations! The service was impeccable and food was divine."],
        ["Worst dining experience ever. Cold food, rude staff, and overpriced."],
        ["The ambiance was nice, but the food was just okay."],
    ]
)

demo_rag = gr.Interface(
    fn=rag_query,
    inputs=gr.Textbox(
        lines=2,
        placeholder="Ask a question about restaurants...",
        label="Question"
    ),
    outputs=gr.Textbox(label="RAG Answer"),
    title="πŸ” Restaurant Knowledge Query (RAG System)",
    description="""
    **Component 2: RAG with Yelp Reviews (Retrieval + Augmentation + Generation)**
    
    Ask questions and get answers based on 500+ real customer reviews!
    """,
    examples=[
        ["What do customers say about food quality at restaurants?"],
        ["Are portions typically good at restaurants?"],
        ["What about customer service?"],
    ]
)

app = gr.TabbedInterface(
    [demo, demo_rag],
    ["Sentiment Analysis", "RAG Knowledge Query"],
    title="β˜• Restaurant Review Advisor - INFO 452 Final Project"
)

print("βœ… Gradio interface ready!")
print("="*70)
print("Launching application...")

if __name__ == "__main__":
    app.launch()