|
|
import gradio as gr |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig |
|
|
import torch |
|
|
|
|
|
|
|
|
nf4_config = BitsAndBytesConfig( |
|
|
load_in_4bit=True, |
|
|
bnb_4bit_quant_type="nf4", |
|
|
bnb_4bit_use_double_quant=True, |
|
|
bnb_4bit_compute_dtype=torch.bfloat16 |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2" |
|
|
|
|
|
|
|
|
def load_model(): |
|
|
print(f"Loading model {MODEL_NAME}...") |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_NAME, |
|
|
quantization_config=nf4_config, |
|
|
device_map="auto" |
|
|
) |
|
|
|
|
|
model = torch.compile(model) |
|
|
print("Model loaded and compiled!") |
|
|
return tokenizer, model |
|
|
|
|
|
tokenizer, model = load_model() |
|
|
|
|
|
def generate_text_from_file(file_obj, prompt_text, max_length=200): |
|
|
if file_obj is None: |
|
|
return "Please upload a file." |
|
|
|
|
|
|
|
|
file_content = file_obj.read().decode("utf-8") |
|
|
|
|
|
|
|
|
|
|
|
full_prompt = f"The following is content from a file:\n\n{file_content}\n\nBased on this, and the following instruction:\n\n{prompt_text}" |
|
|
|
|
|
|
|
|
inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=tokenizer.model_max_length).to(model.device) |
|
|
|
|
|
|
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_length, |
|
|
pad_token_id=tokenizer.eos_token_id, |
|
|
do_sample=False, |
|
|
use_cache=True |
|
|
) |
|
|
|
|
|
|
|
|
generated_text = tokenizer.decode(outputs[0][len(inputs["input_ids"][0]):], skip_special_tokens=True) |
|
|
return generated_text |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=generate_text_from_file, |
|
|
inputs=[ |
|
|
gr.File(label="Upload Input File (.txt, .md, etc.)"), |
|
|
gr.Textbox(label="Your Prompt", placeholder="e.g., Summarize the main points or answer this question about the file.") |
|
|
], |
|
|
outputs="textbox", |
|
|
title="Instant LLM Text Generation from Files on Hugging Face Free Space", |
|
|
description="Upload a text file and provide a prompt to get instant, accurate text generation. Optimized for Hugging Face's free T4 GPU." |
|
|
) |
|
|
|
|
|
iface.launch() |