Spaces:

zeonai
/

testMultiGPU

Paused

App Files Files Community

Gopal2002 commited on Nov 12, 2024

Commit

580a57c

verified ·

1 Parent(s): e9fb3a0

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -2

app.py CHANGED Viewed

@@ -1,4 +1,66 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import streamlit as st
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+import torch,os
+from langchain.llms import HuggingFacePipeline
+from transformers import AutoTokenizer,AutoModelForCausalLM,pipeline,BitsAndBytesConfig
+model_name_or_path = "meta-llama/Llama-2-13b-chat-hf"
+# Count the number of GPUs available
+gpu_count = torch.cuda.device_count()
+# Determine the device to use based on GPU availability and count
+# If more than one GPU is available, use 'auto' to allow the library to choose
+# If only one GPU is available, use 'cuda:0' to specify the first GPU
+# If no GPU is available, use the CPU
+if torch.cuda.is_available() and gpu_count > 1:
+    device = 'auto'
+elif torch.cuda.is_available():
+    device = 'cuda:0'
+else:
+    device = 'cpu'
+tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
+                                            #  quantization_config=bnb_config,
+                                             torch_dtype=torch.float16,
+                                             device_map='auto',)
+print(model.hf_device_map)
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_length=2500,
+    return_full_text=True,
+    do_sample=True,
+    repetition_penalty=1.15,
+    num_return_sequences=1,
+    pad_token_id=2,
+    model_kwargs={"temperature": 0.3,
+                                  "top_p":0.95,
+                                  "top_k":40,
+                                  "max_new_tokens":2500},
+)
+llm = HuggingFacePipeline(pipeline=pipe)
+template = template = """Prompt: {query}
+Answer: """
+prompt_template = PromptTemplate(
+    input_variables=["query"],
+    template=template
+)
+#instantiate the chain
+llm_chain = LLMChain(prompt=prompt_template, llm=llm)
+st.title('Test Multi GPU')
+md = st.text_area('Type in your markdown string (without outer quotes)')
+st.button("Enter", type="primary")
+if st.button("Say hello"):
+    resp=llm_chain.invoke(md)['text']
+    st.write(resp)
+else:
+    st.write("Goodbye")