Spaces:

baidu
/

ERNIE-4.5-VL-28B-A3B-Thinking

Running

App Files Files Community

LokeZhou commited on Nov 10

Commit

cf5d8cf

1 Parent(s): 9c6c306

ERNIE-4.5-VL-28B-A3B-Thinking demo

Browse files

Files changed (1) hide show

app.py +224 -4

app.py CHANGED Viewed

@@ -1,7 +1,227 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoProcessor,TextStreamer,TextIteratorStreamer
+from PIL import Image
+import base64
+import io
+import re
+from typing import Generator, List, Tuple, Optional
+import threading
+MAX_HISTORY=5
+model_path = 'baidu/ERNIE-4.5-VL-28B-A3B-Thinking'
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    trust_remote_code=True
+)
+processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
+processor.eval()
+model.add_image_preprocess(processor)
+def encode_image(image: Image.Image) -> str:
+    if image is None:
+        return ""
+    buffer = io.BytesIO()
+    image.save(buffer, format="PNG")
+    return base64.b64encode(buffer.getvalue()).decode("utf-8")
+def extract_text_from_html(html: str) -> str:
+    text = re.sub(r'<img.*?>', '', html)
+    text = re.sub(r'<.*?>', '', text)
+    if text.startswith("user: "):
+        return text[6:].strip()
+    elif text.startswith("assistant: "):
+        return text[8:].strip()
+    return text.strip()
+def process_chat(
+    message: str,
+    image: Optional[Image.Image],
+    chat_history: List[Tuple[str, str, Optional[str]]],
+    max_new_tokens: int,
+    temperature: float
+) -> Generator[List[Tuple[str, str]], None, None]:
+    """处理聊天输入，流式生成回应"""
+    current_image_b64 = encode_image(image) if image else None
+    image_html = ""
+    if current_image_b64:
+        image_html = f'<br><img src="data:image/png;base64,{current_image_b64}" style="max-width:300px; border-radius:4px;">'
+    user_text = message
+    user_message_html = f"user: {user_text}{image_html}"
+    temp_history = chat_history + [(user_message_html, "", current_image_b64)]
+    model_messages = []
+    for hist in temp_history[:-1]:
+        user_html, assistant_text, hist_image_b64 = hist
+        user_text_clean = extract_text_from_html(user_html)
+        user_content = [{"type": "text", "text": user_text_clean}]
+        if hist_image_b64:
+            user_content.insert(0, {"type": "image_url","image_url": {"url": hist_image_b64}})
+        model_messages.append({"role": "user", "content": user_content})
+        assistant_content=[{"type": "text", "text": assistant_text}]
+        model_messages.append({"role": "bot", "content": assistant_content})
+    current_user_content = [{"type": "text", "text": user_text}]
+    if current_image_b64:
+        current_user_content.insert(0, {"type": "image_url", "image_url": {"url":current_image_b64}})
+    model_messages.append({"role": "user", "content": current_user_content})
+    text = processor.tokenizer.apply_chat_template(
+        model_messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
+    )
+    image_inputs, video_inputs = processor.process_vision_info(model_messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    device = next(model.parameters()).device
+    inputs = inputs.to(device)
+    streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = {
+        **inputs,
+        "streamer": streamer,
+        "max_new_tokens": max_new_tokens,
+        "temperature": temperature,
+        "use_cache": False
+    }
+    thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    generated_text = ""
+    for new_token in streamer:
+        generated_text += new_token
+        temp_history[-1] = (user_message_html, f"assistant: {generated_text}", current_image_b64)
+        display_history = [(h[0], h[1]) for h in temp_history[-MAX_HISTORY:]]
+        yield display_history
+    thread.join()
+def chat_interface(
+    message: str,
+    image: Optional[Image.Image],
+    chat_history: List[Tuple[str, str, Optional[str]]],
+    max_new_tokens: int,
+    temperature: float
+) -> Generator[tuple, None, None]:
+    for updated_display_history in process_chat(message, image, chat_history, max_new_tokens, temperature):
+        updated_full_history = []
+        for i, display_item in enumerate(updated_display_history):
+            full_item = next((h for h in chat_history if h[0] == display_item[0] and h[1] == display_item[1]), None)
+            if full_item:
+                updated_full_history.append(full_item)
+            else:
+                if i == len(updated_display_history) - 1:
+                    img_b64 = encode_image(image) if image else None
+                    updated_full_history.append((display_item[0], display_item[1], img_b64))
+                else:
+                    updated_full_history.append((display_item[0], display_item[1], None))
+        yield "", None, updated_full_history, updated_display_history
+with gr.Blocks(title="ERNIE-4.5-VL-28B-A3B-Thinking", theme=gr.themes.Soft()) as demo:
+    full_chat_history = gr.State([])
+    with gr.Row():
+        with gr.Column(scale=3):
+            chat_display = gr.Chatbot(
+                label="chat_bot",
+                height=500,
+                bubble_full_width=False
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("generation kwargs")
+            max_new_tokens = gr.Slider(
+                minimum=64, maximum=2048, value=512, step=64,
+                label="max_new_token"
+            )
+            temperature = gr.Slider(
+                minimum=0.1, maximum=2.0, value=0.7, step=0.1,
+                label="temperature"
+            )
+            clear_btn = gr.Button("clear", variant="destructive")
+    with gr.Row():
+        text_input = gr.Textbox(
+            label="input text",
+            placeholder="input text messages...",
+            lines=2
+        )
+        image_input = gr.Image(
+            label="input image",
+            placeholder="upload image...",
+            type="pil",
+            height=100
+        )
+        submit_btn = gr.Button("submit", variant="primary")
+    submit_btn.click(
+        fn=chat_interface,
+        inputs=[text_input, image_input, full_chat_history, max_new_tokens, temperature],
+        outputs=[text_input, image_input, full_chat_history, chat_display]
+    )
+    text_input.submit(
+        fn=chat_interface,
+        inputs=[text_input, image_input, full_chat_history, max_new_tokens, temperature],
+        outputs=[text_input, image_input, full_chat_history, chat_display]
+    )
+    def clear_chat():
+        return [], []
+    clear_btn.click(
+        fn=clear_chat,
+        inputs=[],
+        outputs=[full_chat_history, chat_display]
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=8100,share=False)