| import streamlit as st | |
| from transformers import BlipProcessor, BlipForConditionalGeneration | |
| from PIL import Image | |
| # 加载BLIP模型和处理器 | |
| processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") | |
| model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") | |
| st.title("图像描述生成器") | |
| st.write("使用摄像头拍照并生成图像的描述。") | |
| # 使用Streamlit的camera_input来获取用户摄像头输入 | |
| image_data = st.camera_input("请使用摄像头拍照") | |
| if image_data is not None: | |
| # 将图像数据转换为PIL图像 | |
| image = Image.open(image_data) | |
| # 显示拍摄的图像 | |
| st.image(image, caption="拍摄的图像", use_column_width=True) | |
| # 生成图像描述 | |
| inputs = processor(image, return_tensors="pt") | |
| out = model.generate(**inputs) | |
| caption = processor.decode(out[0], skip_special_tokens=True) | |
| st.write(f"图像描述: {caption}") |