Spaces:

mohammadmostafameb
/

new_simple_vqa

Sleeping

new_simple_vqa / app.py

app added

2c72784 8 months ago

1.44 kB

	from transformers import BlipProcessor, BlipForQuestionAnswering
	import torch
	import gradio as gr
	from PIL import Image

	# Load the processor and model
	processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
	model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")

	# Set device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	def vqa_answer(image, question):
	# Preprocess the inputs
	inputs = processor(image, question, return_tensors="pt").to(device)

	# Generate the answer
	with torch.no_grad():
	generated_ids = model.generate(**inputs)
	answer = processor.decode(generated_ids[0], skip_special_tokens=True)

	return answer

	# Define the input components
	image_input = gr.components.Image(type="pil", label="Upload an Image")
	question_input = gr.components.Textbox(lines=1, placeholder="Enter your question here...", label="Question")

	# Define the output component
	answer_output = gr.components.Textbox(label="Answer")

	# Create the interface
	iface = gr.Interface(
	fn=vqa_answer,
	inputs=[image_input, question_input],
	outputs=answer_output,
	title="Visual Question Answering App",
	description="Ask a question about the uploaded image.",
	article="This app uses the BLIP model to answer questions about images."
	)

	# Launch the app
	iface.launch(share=True)