Spaces:

InventorsHub
/

SwarmChat

Sleeping

File size: 1,548 Bytes

bf90c83
 
e979f5e
dc3ea55
bf90c83
 
 
 
 
 
 
 
 
 
 
 
e979f5e
dc3ea55
bf90c83
dc3ea55
 
 
 
 
 
bf90c83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc3ea55
bf90c83

from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# import spaces
import functools
# Download the single GGUF shard by its repo path:

# llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True)
# llm = Llama(
#     model_path=model_path,
#     n_ctx=512,            # down from 4096
#     low_vram=True,         # llama.cpp low-vram mode
#     f16_kv=True,           # half-precision kv cache
#     use_mmap=True,         # mmap file
#     use_mlock=False,
# )
# print("Llama backend initialized successfully!")
# @spaces.CPU
@functools.lru_cache(maxsize=1)
def llm_gpu():
    
    model_path = hf_hub_download(
        repo_id="Inventors-Hub/SwarmChat-models",
        repo_type="model",
        filename="EuroLLM-9B-Instruct-Q4_K_M.gguf",
    )
        
    llm = Llama(
        model_path=model_path,
        n_ctx=512,            # down from 4096
        low_vram=True,         # llama.cpp low-vram mode
        f16_kv=True,           # half-precision kv cache
        use_mmap=True,         # mmap file
        use_mlock=False,
    )
    return llm


# Function to process text using EuroLLM
def translate_text(text):
    input_prompt = f"""
    <|im_start|>system
    <|im_end|>
    <|im_start|>user
    Translate the following text to English:
    Text: {text}
    English: 
    <|im_end|>
    <|im_start|>assistant
    """
    llm = llm_gpu()
    output = llm(input_prompt, max_tokens=1024, temperature=0)

    translated_text = output.get("choices", [{}])[0].get("text", "").strip()

    return translated_text