mohammadmostafameb commited on
Commit
2c72784
·
1 Parent(s): 25af436
Files changed (2) hide show
  1. app.py +43 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BlipProcessor, BlipForQuestionAnswering
2
+ import torch
3
+ import gradio as gr
4
+ from PIL import Image
5
+
6
+ # Load the processor and model
7
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
8
+ model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
9
+
10
+ # Set device
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ model.to(device)
13
+
14
+ def vqa_answer(image, question):
15
+ # Preprocess the inputs
16
+ inputs = processor(image, question, return_tensors="pt").to(device)
17
+
18
+ # Generate the answer
19
+ with torch.no_grad():
20
+ generated_ids = model.generate(**inputs)
21
+ answer = processor.decode(generated_ids[0], skip_special_tokens=True)
22
+
23
+ return answer
24
+
25
+ # Define the input components
26
+ image_input = gr.components.Image(type="pil", label="Upload an Image")
27
+ question_input = gr.components.Textbox(lines=1, placeholder="Enter your question here...", label="Question")
28
+
29
+ # Define the output component
30
+ answer_output = gr.components.Textbox(label="Answer")
31
+
32
+ # Create the interface
33
+ iface = gr.Interface(
34
+ fn=vqa_answer,
35
+ inputs=[image_input, question_input],
36
+ outputs=answer_output,
37
+ title="Visual Question Answering App",
38
+ description="Ask a question about the uploaded image.",
39
+ article="This app uses the BLIP model to answer questions about images."
40
+ )
41
+
42
+ # Launch the app
43
+ iface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers
2
+ gradio
3
+ torch
4
+ pillow
5
+ accelerate