# Chat

## Imports and Settings

In [None]:
import os
import sys
from dotenv import load_dotenv, find_dotenv

# read local .env file (that should contain an "OPENAI_API_KEY")
_ = load_dotenv(find_dotenv()) 

abscurdir = os.path.abspath(os.curdir)
docsdir = os.path.join(os.path.dirname(abscurdir), 'docs')
existing_dbname = 'chroma_20241124_132314'
persist_directory = os.path.join(docsdir, existing_dbname)
collection_name = 'MLbooks'
# llm_name = 'ollama3.2.1b'
llm_name = 'openai'

## Embeddings, Vectorstore and LLM

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
lc_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

import chromadb
from langchain_community.vectorstores import Chroma

client = chromadb.PersistentClient(
    path=persist_directory
    )

vectorstore = Chroma(
    client=client,
    collection_name=collection_name,
    embedding_function=lc_embeddings,
)

In [None]:
print(f"instantiating llm model: {llm_name}")
if llm_name == 'ollama3.2.1b':
    from langchain.llms import Ollama
    llm = Ollama(model="llama3.2:1b", temperature=0)
elif llm_name == 'openai':
    from langchain_openai import ChatOpenAI
    llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

## QA Chain

In [None]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
from langchain.chains import RetrievalQA
question = 'How can machine learning models help a business?'
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})

result = qa_chain({"query": question})
result["result"]

### Memory

In [None]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

### ConversationalRetrievalChain

In [None]:
from langchain.chains import ConversationalRetrievalChain
retriever = vectorstore.as_retriever(
    # search_type="mmr",
    search_type="similarity",
    search_kwargs={"k": 3})
crc = ConversationalRetrievalChain.from_llm(
    llm,
    chain_type='stuff',
    retriever=retriever,
    memory=memory
)

In [None]:
question = 'How does multiclass classification work?'
result = crc({"question": question})
result['answer']

In [None]:
question = "Look at the context that was earlier provided."
result = crc({"question": question})
result['answer']