{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Question Answering\n", "\n", "## Imports and Settings" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import sys\n", "from dotenv import load_dotenv, find_dotenv\n", "\n", "_ = load_dotenv(find_dotenv()) # read local .env file\n", "# openai.api_key = os.environ['OPENAI_API_KEY']\n", "\n", "abscurdir = os.path.abspath(os.curdir)\n", "docsdir = os.path.join(os.path.dirname(abscurdir), 'docs')\n", "existing_dbname = 'chroma_20241124_132314'\n", "persist_directory = os.path.join(docsdir, existing_dbname)\n", "collection_name = 'MLbooks'\n", "llm_name = 'ollama3.2.1b'\n", "# llm_name = 'openai'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Embeddings" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain_community.embeddings import HuggingFaceEmbeddings\n", "# from langchain_huggingface import HuggingFaceEmbeddings\n", "\n", "lc_embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Vector Store" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import chromadb\n", "client = chromadb.PersistentClient(\n", " path=persist_directory\n", " )\n", "\n", "from langchain_community.vectorstores import Chroma\n", "\n", "vectorstore = Chroma(\n", " client=client,\n", " collection_name=collection_name,\n", " embedding_function=lc_embeddings,\n", ")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## LLM" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(f\"instantiating llm model: {llm_name}\")\n", "if llm_name == 'ollama3.2.1b':\n", " from langchain.llms import Ollama\n", " llm = Ollama(model=\"llama3.2:1b\", temperature=0)\n", "elif llm_name == 'openai':\n", " from langchain_openai import ChatOpenAI\n", " llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## RetrievalQA Chain\n", "\n", "### Stuff Method\n", "\n", "#### Legacy Implementation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain.chains import RetrievalQA\n", "from langchain import hub\n", "\n", "# Legacy\n", "from langchain.chains import RetrievalQA\n", "# See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt\n", "prompt = hub.pull(\"rlm/rag-prompt\")\n", "print(prompt)\n", "\n", "qa_chain = RetrievalQA.from_llm(\n", " llm,\n", " retriever=vectorstore.as_retriever(),\n", " prompt=prompt\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "question = 'How can machine learning models help a business?'\n", "result = qa_chain(question)\n", "print(result[\"result\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "question = 'How does multi-class classification work?'\n", "result = qa_chain(question)\n", "result[\"result\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### After Migration\n", "##### LCEL Implementation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain import hub\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.runnables import RunnablePassthrough\n", "\n", "# See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt\n", "prompt = hub.pull(\"rlm/rag-prompt\")\n", "\n", "def format_docs(docs):\n", " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", "\n", "\n", "qa_chain = (\n", " {\n", " \"context\": vectorstore.as_retriever() | format_docs,\n", " \"question\": RunnablePassthrough(),\n", " }\n", " | prompt\n", " | llm\n", " | StrOutputParser()\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "question = 'How can machine learning models help a business?'\n", "result = qa_chain.invoke(question)\n", "print(result)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "question = 'How does multi-class classification work?'\n", "result = qa_chain.invoke(question)\n", "print(result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### With helper functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain import hub\n", "from langchain.chains import create_retrieval_chain\n", "from langchain.chains.combine_documents import create_stuff_documents_chain\n", "\n", "# See full prompt at https://smith.langchain.com/hub/langchain-ai/retrieval-qa-chat\n", "retrieval_qa_chat_prompt = hub.pull(\"langchain-ai/retrieval-qa-chat\")\n", "\n", "combine_docs_chain = create_stuff_documents_chain(llm, retrieval_qa_chat_prompt)\n", "rag_chain = create_retrieval_chain(\n", " vectorstore.as_retriever(), \n", " combine_docs_chain)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "question = 'How can machine learning models help a business?'\n", "result = rag_chain.invoke({\"input\": question})\n", "print(result['answer'])\n", "print(result['context'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Map-Reduce Method" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Legacy\n", "qa_chain_mr = RetrievalQA.from_chain_type(\n", " llm,\n", " retriever=vectorstore.as_retriever(),\n", " chain_type=\"map_reduce\"\n", ")\n", "question = 'How can machine learning models help a business?'\n", "result = qa_chain_mr({\"query\": question})\n", "result[\"result\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# v0.3: \n", "# https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Refine Method" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Legacy\n", "qa_chain_mr = RetrievalQA.from_chain_type(\n", " llm,\n", " retriever=vectorstore.as_retriever(),\n", " chain_type=\"refine\"\n", ")\n", "question = 'How can machine learning models help a business?'\n", "result = qa_chain_mr({\"query\": question})\n", "result[\"result\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# v0.3:\n", "# https://python.langchain.com/docs/versions/migrating_chains/refine_docs_chain/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Map-Rerank Method" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Legacy:\n", "qa_chain_mr = RetrievalQA.from_chain_type(\n", " llm,\n", " retriever=vectorstore.as_retriever(),\n", " chain_type=\"refine\"\n", ")\n", "question = 'How can machine learning models help a business?'\n", "result = qa_chain_mr({\"query\": question})\n", "result[\"result\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# v0.3\n", "https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain/" ] } ], "metadata": { "kernelspec": { "display_name": "langchain_311", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.1" } }, "nbformat": 4, "nbformat_minor": 2 }