Spaces:

Jayem-11
/

LLama2_gguf

Sleeping

App Files Files Community

Jayem-11 commited on Feb 1, 2024

Commit

58b29d7

verified ·

1 Parent(s): 11b602d

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +20 -0
main.py +29 -0
qa.py +64 -0
requirements.txt +7 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/ping")
+async def ping():
+    return "Hello, I am alive"
+@app.post("/qa")
+async def section(request: Request):
+    data = await request.json()
+    from qa import query
+    answer = query(data["Question"])
+    return {"Answer": answer}

qa.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from langchain.document_loaders import DirectoryLoader, TextLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.llms import CTransformers
+from langchain import PromptTemplate
+from langchain.chains import RetrievalQA
+from langchain.vectorstores import FAISS
+import time
+loader = DirectoryLoader("./infotext", glob="*.txt", loader_cls=TextLoader)
+# interpret information in the documents
+documents = loader.load()
+splitter = RecursiveCharacterTextSplitter()
+texts = splitter.split_documents(documents)
+embeddings = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-MiniLM-L6-v2",
+    model_kwargs={'device': 'cpu'})
+# create and save the local database
+db = FAISS.from_documents(texts, embeddings)
+db.save_local("faiss")
+# prepare the template we will use when prompting the AI
+template = """Use the following pieces of information to answer the user's question.
+If you don't know the answer, just say that you don't know, don't try to make up an answer.
+Context: {context}
+Question: {question}
+Only return the helpful answer below and nothing else.
+Helpful answer:
+"""
+# load the language model
+config = {'max_new_tokens': 256, 'temperature': 0.01}
+llm = CTransformers(model="TheBloke/Llama-2-13B-chat-GGML",
+                    model_file="llama-2-13b-chat.ggmlv3.q2_K.bin",
+                    model_type="llama",config=config)
+# load the interpreted information from the local database
+embeddings = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-MiniLM-L6-v2",
+    model_kwargs={'device': 'cpu'})
+db = FAISS.load_local("faiss", embeddings)
+# prepare a version of the llm pre-loaded with the local content
+retriever = db.as_retriever(search_kwargs={'k': 2})
+prompt = PromptTemplate(
+    template=template,
+    input_variables=['context', 'question'])
+def query(question):
+    model = RetrievalQA.from_chain_type(llm=llm,
+                                     chain_type='stuff',
+                                     retriever=retriever,
+                                     return_source_documents=True,
+                                     chain_type_kwargs={'prompt': prompt})
+    time_start = time.time()
+    output = model({'query': question})
+    response = output["result"]
+    time_elapsed = time.time() - time_start
+    return [response, time_elapsed]

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+langchain
+faiss-cpu
+transformers
+ctransformers>=0.2.24
+sentence-transformers