ACloudCenter commited on
Commit
b39fef1
·
1 Parent(s): 3bf9123

Modify main app to remove redunant chatbot issues

Browse files
Files changed (1) hide show
  1. app.py +51 -86
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- # Set tokenizers parallelism to avoid fork warning in Spaces
3
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
4
 
5
  import gradio as gr
@@ -59,52 +58,30 @@ def transcribe_audio(audio_filepath):
59
  return transcript, transcript
60
 
61
 
62
- # Streaming Q&A function
63
  @spaces.GPU
64
  def transcript_qa(transcript, question, history):
65
  if not transcript:
66
- yield history + [{"role": "assistant", "content": "Please transcribe audio first"}], ""
67
- return
68
 
69
  if not question:
70
- yield history, ""
71
- return
72
-
73
- # Add user message to history
74
- history = history + [{"role": "user", "content": question}]
75
-
76
- # Add placeholder for assistant response
77
- history = history + [{"role": "assistant", "content": ""}]
78
 
79
  with torch.inference_mode(), model.llm.disable_adapter():
80
- # For streaming, we'd need to use a different generation method
81
- # Since model.generate doesn't support streaming, we'll generate full response
82
- # but simulate streaming for better UX
83
  output_ids = model.generate(
84
- prompts=[[{"role": "user", "content": f"When answering questions about the transcript, use markdown when appropriate, such as lists, bullet points, and code blocks: {question}\n\n{transcript}"}]],
85
- max_new_tokens=2048, # Reduced for faster responses
86
  )
87
 
88
  ans = model.tokenizer.ids_to_text(output_ids[0].cpu())
89
- ans = ans.split("<|im_start|>assistant")[-1] # get rid of the prompt
90
 
91
  if "<think>" in ans:
92
  if "</think>" in ans:
93
  ans = ans.split("<think>")[-1]
94
- _, ans = ans.split("</think>") # get rid of the thinking
95
- ans = ans.strip()
96
-
97
- # Simulate streaming by yielding words progressively
98
- words = ans.split()
99
- current_response = ""
100
- for i, word in enumerate(words):
101
- current_response += word + " "
102
- history[-1] = {"role": "assistant", "content": current_response.strip()}
103
- yield history, ""
104
- # Small delay to make streaming more visible
105
- if i % 3 == 0: # Try every 3 words for smoother streaming
106
- import time
107
- time.sleep(0.01)
108
 
109
  def disable_transcribe():
110
  return gr.update(interactive=False)
@@ -113,7 +90,7 @@ def enable_transcribe():
113
  return gr.update(interactive=True)
114
 
115
  def reset_chatbot():
116
- return [], [] # Reset both chatbot display and state
117
 
118
  # Build the Gradio interface
119
  with gr.Blocks(theme=theme) as demo:
@@ -179,12 +156,11 @@ with gr.Blocks(theme=theme) as demo:
179
  its transcript. This model is ready for commercial use.''')
180
 
181
  # State variables
182
- transcript_state = gr.State()
183
- chatbot_state = gr.State()
184
 
185
  with gr.Row():
186
  with gr.Column(scale=1):
187
- gr.Markdown("### Step1 - Audio Input")
188
  audio_input = gr.Audio(
189
  sources=["microphone", "upload"],
190
  type="filepath",
@@ -194,7 +170,7 @@ with gr.Blocks(theme=theme) as demo:
194
  transcribe_btn = gr.Button("Transcribe Audio", variant="primary", size="lg")
195
 
196
  with gr.Column(scale=1):
197
- gr.Markdown("### Step2 - Transcript")
198
  transcript_output = gr.Textbox(
199
  label="",
200
  lines=10,
@@ -202,71 +178,60 @@ with gr.Blocks(theme=theme) as demo:
202
  max_lines=10
203
  )
204
 
205
- gr.Markdown("### Step3 - Interactive Q&A")
206
 
207
- # Create a wrapper function for Chatbot
208
- def qa_wrapper(message, history, transcript):
209
- # Check if we have a transcript
210
- if not transcript:
211
- yield "Please transcribe audio first before asking questions."
 
 
 
 
 
 
 
 
 
 
212
 
213
- # Convert Chatbot history format to our format
214
- formatted_history = []
215
- if history:
216
- for msg in history:
217
- if isinstance(msg, dict):
218
- formatted_history.append(msg)
219
- elif isinstance(msg, tuple):
220
- # Handle tuple format (user, assistant)
221
- if msg[0]:
222
- formatted_history.append({"role": "user", "content": msg[0]})
223
- if len(msg) > 1 and msg[1]:
224
- formatted_history.append({"role": "assistant", "content": msg[1]})
225
-
226
- # Process the Q&A with the current message
227
- response_generator = transcript_qa(transcript, message, formatted_history)
228
-
229
- # Stream the response
230
- for response_history, _ in response_generator:
231
- if response_history and response_history[-1]["role"] == "assistant":
232
- yield response_history[-1]["content"]
233
 
234
- # Use ChatInterface for cleaner UI
235
- chatbot = gr.Chatbot(
236
- type="messages",
237
- height=450,
238
- label="",
239
- render_markdown=True,
240
- layout="panel"
241
  )
242
- msg = gr.Textbox(
243
- placeholder="Ask a question about the transcript...",
244
- container=False
 
 
245
  )
246
- examples = [
247
- ["Can you please summarize this?", None],
248
- ["What were the key points discussed?", None],
249
- ["What was the main topic?", None],
250
- ["What is the TLDR version so I can just leave this conference call early?", None]
251
- ]
252
- additional_inputs = [transcript_state]
253
-
254
- # Event handlers - simplified since ChatInterface handles Q&A
255
  transcribe_btn.click(
256
  fn=disable_transcribe,
257
- inputs=None,
258
  outputs=[transcribe_btn]
259
  ).then(
260
  fn=reset_chatbot,
261
- inputs=None,
262
- outputs=[chatbot, chatbot_state] # Reset both the display and state
263
  ).then(
264
  fn=transcribe_audio,
265
  inputs=[audio_input],
266
  outputs=[transcript_output, transcript_state]
267
  ).then(
268
  fn=enable_transcribe,
269
- inputs=None,
270
  outputs=[transcribe_btn]
271
  )
272
 
 
1
  import os
 
2
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
3
 
4
  import gradio as gr
 
58
  return transcript, transcript
59
 
60
 
 
61
  @spaces.GPU
62
  def transcript_qa(transcript, question, history):
63
  if not transcript:
64
+ return history + [{"role": "user", "content": question}, {"role": "assistant", "content": "Please transcribe audio first before asking questions."}]
 
65
 
66
  if not question:
67
+ return history
 
 
 
 
 
 
 
68
 
69
  with torch.inference_mode(), model.llm.disable_adapter():
 
 
 
70
  output_ids = model.generate(
71
+ prompts=[[{"role": "user", "content": f"Answer this question about the transcript: {question}\n\nTranscript: {transcript}"}]],
72
+ max_new_tokens=512,
73
  )
74
 
75
  ans = model.tokenizer.ids_to_text(output_ids[0].cpu())
76
+ ans = ans.split("<|im_start|>assistant")[-1]
77
 
78
  if "<think>" in ans:
79
  if "</think>" in ans:
80
  ans = ans.split("<think>")[-1]
81
+ _, ans = ans.split("</think>")
82
+ ans = ans.strip()
83
+
84
+ return history + [{"role": "user", "content": question}, {"role": "assistant", "content": ans}]
 
 
 
 
 
 
 
 
 
 
85
 
86
  def disable_transcribe():
87
  return gr.update(interactive=False)
 
90
  return gr.update(interactive=True)
91
 
92
  def reset_chatbot():
93
+ return []
94
 
95
  # Build the Gradio interface
96
  with gr.Blocks(theme=theme) as demo:
 
156
  its transcript. This model is ready for commercial use.''')
157
 
158
  # State variables
159
+ transcript_state = gr.State("")
 
160
 
161
  with gr.Row():
162
  with gr.Column(scale=1):
163
+ gr.Markdown("### Step 1 - Audio Input")
164
  audio_input = gr.Audio(
165
  sources=["microphone", "upload"],
166
  type="filepath",
 
170
  transcribe_btn = gr.Button("Transcribe Audio", variant="primary", size="lg")
171
 
172
  with gr.Column(scale=1):
173
+ gr.Markdown("### Step 2 - Transcript")
174
  transcript_output = gr.Textbox(
175
  label="",
176
  lines=10,
 
178
  max_lines=10
179
  )
180
 
181
+ gr.Markdown("### Step 3 - Interactive Q&A")
182
 
183
+ chatbot = gr.Chatbot(type="messages", height=450)
184
+ msg = gr.Textbox(placeholder="Ask a question about the transcript...", label="")
185
+ with gr.Row():
186
+ submit_btn = gr.Button("Submit", variant="primary")
187
+ clear_btn = gr.Button("Clear Chat")
188
+
189
+ gr.Examples(
190
+ examples=[
191
+ "Can you please summarize this?",
192
+ "What were the key points discussed?",
193
+ "What was the main topic?",
194
+ "What is the TLDR version?"
195
+ ],
196
+ inputs=msg
197
+ )
198
 
199
+ # Event handlers
200
+ def submit_question(question, history, transcript):
201
+ if not question:
202
+ return "", history
203
+ new_history = transcript_qa(transcript, question, history)
204
+ return "", new_history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
+ msg.submit(
207
+ fn=submit_question,
208
+ inputs=[msg, chatbot, transcript_state],
209
+ outputs=[msg, chatbot]
 
 
 
210
  )
211
+
212
+ submit_btn.click(
213
+ fn=submit_question,
214
+ inputs=[msg, chatbot, transcript_state],
215
+ outputs=[msg, chatbot]
216
  )
217
+
218
+ clear_btn.click(
219
+ fn=lambda: [],
220
+ outputs=[chatbot]
221
+ )
222
+
 
 
 
223
  transcribe_btn.click(
224
  fn=disable_transcribe,
 
225
  outputs=[transcribe_btn]
226
  ).then(
227
  fn=reset_chatbot,
228
+ outputs=[chatbot]
 
229
  ).then(
230
  fn=transcribe_audio,
231
  inputs=[audio_input],
232
  outputs=[transcript_output, transcript_state]
233
  ).then(
234
  fn=enable_transcribe,
 
235
  outputs=[transcribe_btn]
236
  )
237