Spaces:

TLH01
/

Individualssignment

Build error

App Files Files Community

TLH01 commited on May 1, 2025

Commit

4507ad7

verified ·

1 Parent(s): 7f66618

Update apptest.py

Browse files

Files changed (1) hide show

apptest.py +189 -60

apptest.py CHANGED Viewed

@@ -1,84 +1,213 @@
 import streamlit as st
 from PIL import Image
 import tempfile
 import numpy as np
-from transformers import pipeline, set_seed
-import soundfile as sf
-# --- 模型初始化（缓存优化）---
 @st.cache_resource
-def load_models():
-    caption_pipeline = pipeline(
         "image-to-text",
         model="Salesforce/blip-image-captioning-base",
         device="cuda" if torch.cuda.is_available() else "cpu"
     )
-    story_pipeline = pipeline(
         "text-generation",
         model="pranavpsv/gpt2-genre-story-generator",
         device="cuda" if torch.cuda.is_available() else "cpu"
     )
-    tts_pipeline = pipeline(
         "text-to-speech",
-        model="speechbrain/tts-tacotron2-ljspeech",
         device="cuda" if torch.cuda.is_available() else "cpu"
     )
-    return caption_pipeline, story_pipeline, tts_pipeline
-# --- Stage 1: Image → Caption ---
-def generate_caption(image, pipeline):
-    caption = pipeline(image)[0]['generated_text']
-    return caption
-# --- Stage 2: Caption → Story (严格限制字数) ---
-def generate_story(caption, pipeline):
-    prompt = f"Generate a children's story in 50-100 words about: {caption}"
-    story = pipeline(
-        prompt,
-        max_length=150,  # Token数量（约对应100词）
-        min_length=80,   # 约对应50词
-        do_sample=True,
-        temperature=0.7,
-        top_k=50,
-        num_return_sequences=1
-    )[0]['generated_text']
-    # 移除重复提示并截断
-    story = story.replace(prompt, "").strip().split(".")[:5]  # 取前5个句子
-    return ".".join(story[:5]) + "."  # 确保以句号结尾
-# --- Stage 3: Story → Audio (兼容Spaces) ---
-def generate_audio(story_text, pipeline):
-    speech = pipeline(story_text)
-    audio_array = speech["audio"].squeeze().numpy()
-    sample_rate = speech["sampling_rate"]
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
-        sf.write(f.name, audio_array, sample_rate)
-        return f.name
-# --- Streamlit UI ---
 def main():
-    st.title("📖 AI Storyteller for Kids")
-    caption_pipeline, story_pipeline, tts_pipeline = load_models()
-    uploaded_image = st.file_uploader("Upload a child-friendly image", type=["jpg", "jpeg", "png"])
-    if uploaded_image:
-        image = Image.open(uploaded_image)
-        st.image(image, use_column_width=True)
-        with st.spinner("🔍 Analyzing the image..."):
-            caption = generate_caption(image, caption_pipeline)
-            st.success(f"📝 Caption: {caption}")
-        with st.spinner("✨ Creating a magical story..."):
-            story = generate_story(caption, story_pipeline)
-            st.subheader("📚 Your Story")
-            st.write(story)
-            st.info(f"Word count: {len(story.split())}")  # 显示字数
-        with st.spinner("🔊 Generating audio..."):
-            audio_path = generate_audio(story, tts_pipeline)
-            st.audio(audio_path, format="audio/wav")
 if __name__ == "__main__":
-    import torch  # 延迟导入以避免Spaces预加载问题
     main()

+"""
+Magic Story Generator App for Hugging Face Spaces
+Creates custom children's stories from uploaded images
+"""
 import streamlit as st
 from PIL import Image
 import tempfile
 import numpy as np
+from transformers import pipeline
+import torch
+import os
+# ======================
+# UI Configuration
+# ======================
+def configure_ui():
+    """Sets up child-friendly interface with custom styling"""
+    st.set_page_config(
+        page_title="✨ Magic Story Generator",
+        page_icon="🧚",
+        layout="wide"
+    )
+    # Custom CSS for child-friendly design
+    st.markdown("""
+    <style>
+    .main {
+        background-color: #FFF5E6;
+        background-image: url('https://img.freepik.com/free-vector/hand-drawn-childish-pattern_23-2149073136.jpg');
+        background-size: 30%;
+        opacity: 0.9;
+    }
+    h1 {
+        color: #FF6B6B;
+        font-family: 'Comic Sans MS', cursive;
+        text-align: center;
+        text-shadow: 2px 2px 4px #FFD166;
+    }
+    .stButton>button {
+        background-color: #4ECDC4;
+        color: white;
+        border-radius: 20px;
+        padding: 10px 24px;
+        font-weight: bold;
+    }
+    .stFileUploader>div>div>div>div {
+        border: 2px dashed #FF9E7D;
+        border-radius: 15px;
+        background-color: #FFF0F5;
+    }
+    .story-box {
+        background-color: #FFF0F5;
+        padding: 20px;
+        border-radius: 15px;
+        border-left: 5px solid #FF6B6B;
+        font-family: 'Comic Sans MS', cursive;
+        font-size: 18px;
+        line-height: 1.6;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+# ======================
+# Stage 1: Image Captioning
+# ======================
 @st.cache_resource
+def load_image_captioner():
+    """Loads BLIP image captioning model with GPU support if available"""
+    return pipeline(
         "image-to-text",
         model="Salesforce/blip-image-captioning-base",
         device="cuda" if torch.cuda.is_available() else "cpu"
     )
+def generate_caption(_pipeline, image):
+    """Generates English description of uploaded image"""
+    try:
+        # Generate caption with 20-50 words
+        result = _pipeline(image, max_new_tokens=50)
+        return result[0]['generated_text']
+    except Exception as e:
+        st.error(f"Caption generation failed: {str(e)}")
+        return None
+# ======================
+# Stage 2: Story Generation
+# ======================
+@st.cache_resource
+def load_story_generator():
+    """Loads fine-tuned GPT-2 story generator"""
+    return pipeline(
         "text-generation",
         model="pranavpsv/gpt2-genre-story-generator",
         device="cuda" if torch.cuda.is_available() else "cpu"
     )
+def generate_story(_pipeline, keywords):
+    """Creates a children's story (60-100 words) based on image caption"""
+    prompt = f"""Generate a children's story (60-100 words) based on: {keywords}
+    Requirements:
+    - Use simple English (Grade 2 level)
+    - Include magical/fantasy elements
+    - Have positive moral lesson
+    - Happy ending
+    - Exactly 3 paragraphs
+    Story:"""
+    try:
+        story = _pipeline(
+            prompt,
+            max_length=250,  # Controls token count (~100 words)
+            temperature=0.7, # Balance creativity vs coherence
+            do_sample=True,
+            top_k=50
+        )[0]['generated_text']
+        # Clean up output by removing prompt
+        return story.replace(prompt, "").strip()
+    except Exception as e:
+        st.error(f"Story generation failed: {str(e)}")
+        return None
+# ======================
+# Stage 3: Text-to-Speech
+# ======================
+@st.cache_resource
+def load_tts():
+    """Loads multilingual TTS model"""
+    return pipeline(
         "text-to-speech",
+        model="facebook/mms-tts-eng",
         device="cuda" if torch.cuda.is_available() else "cpu"
     )
+def text_to_speech(_pipeline, text):
+    """Converts generated story to speech audio"""
+    try:
+        audio = _pipeline(text)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+            import soundfile as sf
+            sf.write(f.name, audio["audio"].squeeze().numpy(), audio["sampling_rate"])
+            return f.name
+    except Exception as e:
+        st.error(f"Audio generation failed: {str(e)}")
+        return None
+# ======================
+# Main Application
+# ======================
 def main():
+    # Configure UI first
+    configure_ui()
+    # App header
+    st.title("🧚 Magic Story Generator")
+    st.markdown("""
+    <div style="text-align:center; color:#FF8E72; font-family: 'Comic Sans MS'; font-size: 20px;">
+    Upload a child's photo and AI will create a custom fairy tale with audio!
+    </div>
+    """, unsafe_allow_html=True)
+    # File upload section
+    uploaded_file = st.file_uploader(
+        "Choose a photo of children's activity",
+        type=["jpg", "jpeg", "png"],
+        help="Examples: playing, reading, drawing etc."
+    )
+    if not uploaded_file:
+        st.info("👆 Please upload an image to begin")
+        return
+    # Display uploaded image
+    image = Image.open(uploaded_file)
+    st.image(image, caption="Your uploaded photo", use_column_width=True)
+    # Load all models (shows loading animation)
+    with st.spinner("🪄 Preparing magic tools..."):
+        caption_pipe = load_image_captioner()
+        story_pipe = load_story_generator()
+        tts_pipe = load_tts()
+    # --- Stage 1: Image Captioning ---
+    with st.spinner("🔍 Analyzing the image..."):
+        caption = generate_caption(caption_pipe, image)
+        if caption:
+            st.success(f"📝 AI sees: {caption}")
+    # --- Stage 2: Story Generation ---
+    if caption:
+        with st.spinner("✍️ Writing your story..."):
+            story = generate_story(story_pipe, caption)
+            if story:
+                st.subheader("📖 Your Custom Story")
+                st.markdown(f"""
+                <div class="story-box">
+                {story}
+                </div>
+                """, unsafe_allow_html=True)
+                # --- Stage 3: Text-to-Speech ---
+                with st.spinner("🔊 Creating audio version..."):
+                    audio_path = text_to_speech(tts_pipe, story)
+                    if audio_path:
+                        st.audio(audio_path, format="audio/wav")
+                        st.success("Audio ready! Click play above to listen")
+                        st.balloons()  # Celebration animation
 if __name__ == "__main__":
+    # Set Hugging Face cache location
+    os.environ["HF_HUB_CACHE"] = "/tmp/huggingface"
     main()