Files
prompt-engineering-for-deve…/content/LangChain for LLM Application Development/5.基于文档的问答 Question and Answer.ipynb
2023-07-11 22:24:16 +08:00

1 line
30 KiB
Plaintext

{"cells": [{"cell_type": "markdown", "id": "f200ba9a", "metadata": {}, "source": ["# \u7b2c\u4e94\u7ae0 \u57fa\u4e8e\u6587\u6863\u7684\u95ee\u7b54", "\n", " - [\u4e00\u3001\u5bfc\u5165embedding\u6a21\u578b\u548c\u5411\u91cf\u5b58\u50a8\u7ec4\u4ef6](#\u4e00\u3001\u5bfc\u5165embedding\u6a21\u578b\u548c\u5411\u91cf\u5b58\u50a8\u7ec4\u4ef6)\n", " - [1.1 \u521b\u5efa\u5411\u91cf\u5b58\u50a8](#1.1-\u521b\u5efa\u5411\u91cf\u5b58\u50a8)\n", " - [1.2 \u4f7f\u7528\u8bed\u8a00\u6a21\u578b\u4e0e\u6587\u6863\u7ed3\u5408\u4f7f\u7528](#1.2-\u4f7f\u7528\u8bed\u8a00\u6a21\u578b\u4e0e\u6587\u6863\u7ed3\u5408\u4f7f\u7528)\n", " - [\u4e8c\u3001 \u5982\u4f55\u56de\u7b54\u6211\u4eec\u6587\u6863\u7684\u76f8\u5173\u95ee\u9898](#\u4e8c\u3001-\u5982\u4f55\u56de\u7b54\u6211\u4eec\u6587\u6863\u7684\u76f8\u5173\u95ee\u9898)\n", " - [1.3 \u4e0d\u540c\u7c7b\u578b\u7684chain\u94fe](#1.3-\u4e0d\u540c\u7c7b\u578b\u7684chain\u94fe)\n"]}, {"cell_type": "markdown", "id": "52824b89-532a-4e54-87e9-1410813cd39e", "metadata": {}, "source": ["\n", "\u672c\u7ae0\u5185\u5bb9\u4e3b\u8981\u5229\u7528langchain\u6784\u5efa\u5411\u91cf\u6570\u636e\u5e93\uff0c\u53ef\u4ee5\u5728\u6587\u6863\u4e0a\u65b9\u6216\u5173\u4e8e\u6587\u6863\u56de\u7b54\u95ee\u9898\uff0c\u56e0\u6b64\uff0c\u7ed9\u5b9a\u4ecePDF\u6587\u4ef6\u3001\u7f51\u9875\u6216\u67d0\u4e9b\u516c\u53f8\u7684\u5185\u90e8\u6587\u6863\u6536\u96c6\u4e2d\u63d0\u53d6\u7684\u6587\u672c\uff0c\u4f7f\u7528llm\u56de\u7b54\u6709\u5173\u8fd9\u4e9b\u6587\u6863\u5185\u5bb9\u7684\u95ee\u9898"]}, {"cell_type": "markdown", "id": "4aac484b", "metadata": {"height": 30}, "source": ["\n", "\n", "\u5b89\u88c5langchain\uff0c\u8bbe\u7f6echatGPT\u7684OPENAI_API_KEY\n", "\n", "* \u5b89\u88c5langchain\n", "\n", "```\n", "pip install langchain\n", "```\n", "* \u5b89\u88c5docarray\n", "\n", "```\n", "pip install docarray\n", "```\n", "* \u8bbe\u7f6eAPI-KEY\u73af\u5883\u53d8\u91cf\n", "\n", "```\n", "export OPENAI_API_KEY='api-key'\n", "\n", "```"]}, {"cell_type": "code", "execution_count": 2, "id": "b7ed03ed-1322-49e3-b2a2-33e94fb592ef", "metadata": {"height": 81, "tags": []}, "outputs": [], "source": ["import os\n", "\n", "from dotenv import load_dotenv, find_dotenv\n", "_ = load_dotenv(find_dotenv()) #\u8bfb\u53d6\u73af\u5883\u53d8\u91cf"]}, {"cell_type": "code", "execution_count": 52, "id": "af8c3c96", "metadata": {}, "outputs": [{"data": {"text/plain": ["'\\n\\n\u4eba\u5de5\u667a\u80fd\u662f\u4e00\u9879\u6781\u5177\u524d\u666f\u7684\u6280\u672f\uff0c\u5b83\u7684\u53d1\u5c55\u6b63\u5728\u6539\u53d8\u4eba\u7c7b\u7684\u751f\u6d3b\u65b9\u5f0f\uff0c\u5e26\u6765\u4e86\u65e0\u6570\u7684\u4fbf\u5229\uff0c\u4e5f\u88ab\u8ba4\u4e3a\u662f\u672a\u6765\u53d1\u5c55\u7684\u91cd\u8981\u6807\u5fd7\u3002\u4eba\u5de5\u667a\u80fd\u7684\u53d1\u5c55\u8ba9\u8bb8\u591a\u590d\u6742\u7684\u4efb\u52a1\u53d8\u5f97\u66f4\u52a0\u5bb9\u6613\uff0c\u66f4\u9ad8\u6548\u7684\u5b8c\u6210\uff0c\u8282\u7701\u4e86\u5927\u91cf\u7684\u65f6\u95f4\u548c\u7cbe\u529b\uff0c\u4e3a\u4eba\u7c7b\u53d1\u5c55\u5e26\u6765\u4e86\u6781\u5927\u7684\u5e2e\u52a9\u3002'"]}, "execution_count": 52, "metadata": {}, "output_type": "execute_result"}], "source": ["from langchain.llms import OpenAI\n", "\n", "llm = OpenAI(model_name=\"text-davinci-003\",max_tokens=1024)\n", "llm(\"\u600e\u4e48\u8bc4\u4ef7\u4eba\u5de5\u667a\u80fd\")"]}, {"cell_type": "markdown", "id": "8cb7a7ec", "metadata": {"height": 30}, "source": ["## \u4e00\u3001\u5bfc\u5165embedding\u6a21\u578b\u548c\u5411\u91cf\u5b58\u50a8\u7ec4\u4ef6\n", "\u4f7f\u7528Dock Array\u5185\u5b58\u641c\u7d22\u5411\u91cf\u5b58\u50a8\uff0c\u4f5c\u4e3a\u4e00\u4e2a\u5185\u5b58\u5411\u91cf\u5b58\u50a8\uff0c\u4e0d\u9700\u8981\u8fde\u63a5\u5916\u90e8\u6570\u636e\u5e93"]}, {"cell_type": "code", "execution_count": 3, "id": "974acf8e-8f88-42de-88f8-40a82cb58e8b", "metadata": {"height": 98}, "outputs": [], "source": ["from langchain.chains import RetrievalQA #\u68c0\u7d22QA\u94fe\uff0c\u5728\u6587\u6863\u4e0a\u8fdb\u884c\u68c0\u7d22\n", "from langchain.chat_models import ChatOpenAI #openai\u6a21\u578b\n", "from langchain.document_loaders import CSVLoader #\u6587\u6863\u52a0\u8f7d\u5668\uff0c\u91c7\u7528csv\u683c\u5f0f\u5b58\u50a8\n", "from langchain.vectorstores import DocArrayInMemorySearch #\u5411\u91cf\u5b58\u50a8\n", "from IPython.display import display, Markdown #\u5728jupyter\u663e\u793a\u4fe1\u606f\u7684\u5de5\u5177"]}, {"cell_type": "code", "execution_count": 4, "id": "7249846e", "metadata": {"height": 75}, "outputs": [], "source": ["#\u8bfb\u53d6\u6587\u4ef6\n", "file = 'OutdoorClothingCatalog_1000.csv'\n", "loader = CSVLoader(file_path=file)"]}, {"cell_type": "code", "execution_count": 24, "id": "7724f00e", "metadata": {"height": 30}, "outputs": [{"data": {"text/html": ["<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>0</th>\n", " <th>1</th>\n", " <th>2</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>NaN</td>\n", " <td>name</td>\n", " <td>description</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>0.0</td>\n", " <td>Women's Campside Oxfords</td>\n", " <td>This ultracomfortable lace-to-toe Oxford boast...</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1.0</td>\n", " <td>Recycled Waterhog Dog Mat, Chevron Weave</td>\n", " <td>Protect your floors from spills and splashing ...</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>2.0</td>\n", " <td>Infant and Toddler Girls' Coastal Chill Swimsu...</td>\n", " <td>She'll love the bright colors, ruffles and exc...</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>3.0</td>\n", " <td>Refresh Swimwear, V-Neck Tankini Contrasts</td>\n", " <td>Whether you're going for a swim or heading out...</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>996</th>\n", " <td>995.0</td>\n", " <td>Men's Classic Denim, Standard Fit</td>\n", " <td>Crafted from premium denim that will last wash...</td>\n", " </tr>\n", " <tr>\n", " <th>997</th>\n", " <td>996.0</td>\n", " <td>CozyPrint Sweater Fleece Pullover</td>\n", " <td>The ultimate sweater fleece - made from superi...</td>\n", " </tr>\n", " <tr>\n", " <th>998</th>\n", " <td>997.0</td>\n", " <td>Women's NRS Endurance Spray Paddling Pants</td>\n", " <td>These comfortable and affordable splash paddli...</td>\n", " </tr>\n", " <tr>\n", " <th>999</th>\n", " <td>998.0</td>\n", " <td>Women's Stop Flies Hoodie</td>\n", " <td>This great-looking hoodie uses No Fly Zone Tec...</td>\n", " </tr>\n", " <tr>\n", " <th>1000</th>\n", " <td>999.0</td>\n", " <td>Modern Utility Bag</td>\n", " <td>This US-made crossbody bag is built with the s...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>1001 rows \u00d7 3 columns</p>\n", "</div>"], "text/plain": [" 0 1 \n", "0 NaN name \\\n", "1 0.0 Women's Campside Oxfords \n", "2 1.0 Recycled Waterhog Dog Mat, Chevron Weave \n", "3 2.0 Infant and Toddler Girls' Coastal Chill Swimsu... \n", "4 3.0 Refresh Swimwear, V-Neck Tankini Contrasts \n", "... ... ... \n", "996 995.0 Men's Classic Denim, Standard Fit \n", "997 996.0 CozyPrint Sweater Fleece Pullover \n", "998 997.0 Women's NRS Endurance Spray Paddling Pants \n", "999 998.0 Women's Stop Flies Hoodie \n", "1000 999.0 Modern Utility Bag \n", "\n", " 2 \n", "0 description \n", "1 This ultracomfortable lace-to-toe Oxford boast... \n", "2 Protect your floors from spills and splashing ... \n", "3 She'll love the bright colors, ruffles and exc... \n", "4 Whether you're going for a swim or heading out... \n", "... ... \n", "996 Crafted from premium denim that will last wash... \n", "997 The ultimate sweater fleece - made from superi... \n", "998 These comfortable and affordable splash paddli... \n", "999 This great-looking hoodie uses No Fly Zone Tec... \n", "1000 This US-made crossbody bag is built with the s... \n", "\n", "[1001 rows x 3 columns]"]}, "execution_count": 24, "metadata": {}, "output_type": "execute_result"}], "source": ["#\u67e5\u770b\u6570\u636e\n", "import pandas as pd\n", "data = pd.read_csv(file,header=None)\n", "data"]}, {"cell_type": "markdown", "id": "3bd6422c", "metadata": {}, "source": ["\u63d0\u4f9b\u4e86\u4e00\u4e2a\u6237\u5916\u670d\u88c5\u7684CSV\u6587\u4ef6\uff0c\u6211\u4eec\u5c06\u4f7f\u7528\u5b83\u4e0e\u8bed\u8a00\u6a21\u578b\u7ed3\u5408\u4f7f\u7528"]}, {"cell_type": "markdown", "id": "2963fc63", "metadata": {}, "source": ["### 1.1 \u521b\u5efa\u5411\u91cf\u5b58\u50a8\n", "\u5c06\u5bfc\u5165\u4e00\u4e2a\u7d22\u5f15\uff0c\u5373\u5411\u91cf\u5b58\u50a8\u7d22\u5f15\u521b\u5efa\u5668"]}, {"cell_type": "code", "execution_count": 25, "id": "5bfaba30", "metadata": {"height": 30}, "outputs": [], "source": ["from langchain.indexes import VectorstoreIndexCreator #\u5bfc\u5165\u5411\u91cf\u5b58\u50a8\u7d22\u5f15\u521b\u5efa\u5668"]}, {"cell_type": "code", "execution_count": null, "id": "9e200726", "metadata": {"height": 64}, "outputs": [], "source": ["'''\n", "\u5c06\u6307\u5b9a\u5411\u91cf\u5b58\u50a8\u7c7b,\u521b\u5efa\u5b8c\u6210\u540e\uff0c\u6211\u4eec\u5c06\u4ece\u52a0\u8f7d\u5668\u4e2d\u8c03\u7528,\u901a\u8fc7\u6587\u6863\u8bb0\u8f7d\u5668\u5217\u8868\u52a0\u8f7d\n", "'''\n", "\n", "index = VectorstoreIndexCreator(\n", " vectorstore_cls=DocArrayInMemorySearch\n", ").from_loaders([loader])"]}, {"cell_type": "code", "execution_count": 9, "id": "34562d81", "metadata": {"height": 47}, "outputs": [], "source": ["query =\"Please list all your shirts with sun protection \\\n", "in a table in markdown and summarize each one.\""]}, {"cell_type": "code", "execution_count": 21, "id": "cfd0cc37", "metadata": {"height": 30}, "outputs": [], "source": ["response = index.query(query)#\u4f7f\u7528\u7d22\u5f15\u67e5\u8be2\u521b\u5efa\u4e00\u4e2a\u54cd\u5e94\uff0c\u5e76\u4f20\u5165\u8fd9\u4e2a\u67e5\u8be2"]}, {"cell_type": "code", "execution_count": 23, "id": "ae21f1ff", "metadata": {"height": 30, "scrolled": true}, "outputs": [{"data": {"text/markdown": ["\n", "\n", "| Name | Description |\n", "| --- | --- |\n", "| Men's Tropical Plaid Short-Sleeve Shirt | UPF 50+ rated, 100% polyester, wrinkle-resistant, front and back cape venting, two front bellows pockets |\n", "| Men's Plaid Tropic Shirt, Short-Sleeve | UPF 50+ rated, 52% polyester and 48% nylon, machine washable and dryable, front and back cape venting, two front bellows pockets |\n", "| Men's TropicVibe Shirt, Short-Sleeve | UPF 50+ rated, 71% Nylon, 29% Polyester, 100% Polyester knit mesh, machine wash and dry, front and back cape venting, two front bellows pockets |\n", "| Sun Shield Shirt by | UPF 50+ rated, 78% nylon, 22% Lycra Xtra Life fiber, handwash, line dry, wicks moisture, fits comfortably over swimsuit, abrasion resistant |\n", "\n", "All four shirts provide UPF 50+ sun protection, blocking 98% of the sun's harmful rays. The Men's Tropical Plaid Short-Sleeve Shirt is made of 100% polyester and is wrinkle-resistant"], "text/plain": ["<IPython.core.display.Markdown object>"]}, "metadata": {}, "output_type": "display_data"}], "source": ["display(Markdown(response))#\u67e5\u770b\u67e5\u8be2\u8fd4\u56de\u7684\u5185\u5bb9"]}, {"cell_type": "markdown", "id": "eb74cc79", "metadata": {}, "source": ["\u5f97\u5230\u4e86\u4e00\u4e2aMarkdown\u8868\u683c\uff0c\u5176\u4e2d\u5305\u542b\u6240\u6709\u5e26\u6709\u9632\u6652\u8863\u7684\u886c\u886b\u7684\u540d\u79f0\u548c\u63cf\u8ff0\uff0c\u8fd8\u5f97\u5230\u4e86\u4e00\u4e2a\u8bed\u8a00\u6a21\u578b\u63d0\u4f9b\u7684\u4e0d\u9519\u7684\u5c0f\u603b\u7ed3"]}, {"cell_type": "markdown", "id": "dd34e50e", "metadata": {}, "source": ["### 1.2 \u4f7f\u7528\u8bed\u8a00\u6a21\u578b\u4e0e\u6587\u6863\u7ed3\u5408\u4f7f\u7528\n", "\u60f3\u8981\u4f7f\u7528\u8bed\u8a00\u6a21\u578b\u5e76\u5c06\u5176\u4e0e\u6211\u4eec\u7684\u8bb8\u591a\u6587\u6863\u7ed3\u5408\u4f7f\u7528\uff0c\u4f46\u662f\u8bed\u8a00\u6a21\u578b\u4e00\u6b21\u53ea\u80fd\u68c0\u67e5\u51e0\u5343\u4e2a\u5355\u8bcd\uff0c\u5982\u679c\u6211\u4eec\u6709\u975e\u5e38\u5927\u7684\u6587\u6863\uff0c\u5982\u4f55\u8ba9\u8bed\u8a00\u6a21\u578b\u56de\u7b54\u5173\u4e8e\u5176\u4e2d\u6240\u6709\u5185\u5bb9\u7684\u95ee\u9898\u5462\uff1f\u901a\u8fc7embedding\u548c\u5411\u91cf\u5b58\u50a8\u5b9e\u73b0\n", "* embedding \n", "\u6587\u672c\u7247\u6bb5\u521b\u5efa\u6570\u503c\u8868\u793a\u6587\u672c\u8bed\u4e49\uff0c\u76f8\u4f3c\u5185\u5bb9\u7684\u6587\u672c\u7247\u6bb5\u5c06\u5177\u6709\u76f8\u4f3c\u7684\u5411\u91cf\uff0c\u8fd9\u4f7f\u6211\u4eec\u53ef\u4ee5\u5728\u5411\u91cf\u7a7a\u95f4\u4e2d\u6bd4\u8f83\u6587\u672c\u7247\u6bb5\n", "* \u5411\u91cf\u6570\u636e\u5e93 \n", "\u5411\u91cf\u6570\u636e\u5e93\u662f\u5b58\u50a8\u6211\u4eec\u5728\u4e0a\u4e00\u6b65\u4e2d\u521b\u5efa\u7684\u8fd9\u4e9b\u5411\u91cf\u8868\u793a\u7684\u4e00\u79cd\u65b9\u5f0f\uff0c\u6211\u4eec\u521b\u5efa\u8fd9\u4e2a\u5411\u91cf\u6570\u636e\u5e93\u7684\u65b9\u5f0f\u662f\u7528\u6765\u81ea\u4f20\u5165\u6587\u6863\u7684\u6587\u672c\u5757\u586b\u5145\u5b83\u3002\n", "\u5f53\u6211\u4eec\u83b7\u5f97\u4e00\u4e2a\u5927\u7684\u4f20\u5165\u6587\u6863\u65f6\uff0c\u6211\u4eec\u9996\u5148\u5c06\u5176\u5206\u6210\u8f83\u5c0f\u7684\u5757\uff0c\u56e0\u4e3a\u6211\u4eec\u53ef\u80fd\u65e0\u6cd5\u5c06\u6574\u4e2a\u6587\u6863\u4f20\u9012\u7ed9\u8bed\u8a00\u6a21\u578b\uff0c\u56e0\u6b64\u91c7\u7528\u5206\u5757embedding\u7684\u65b9\u5f0f\u50a8\u5b58\u5230\u5411\u91cf\u6570\u636e\u5e93\u4e2d\u3002\u8fd9\u5c31\u662f\u521b\u5efa\u7d22\u5f15\u7684\u8fc7\u7a0b\u3002\n", "\n", "\u901a\u8fc7\u8fd0\u884c\u65f6\u4f7f\u7528\u7d22\u5f15\u6765\u67e5\u627e\u4e0e\u4f20\u5165\u67e5\u8be2\u6700\u76f8\u5173\u7684\u6587\u672c\u7247\u6bb5\uff0c\u7136\u540e\u6211\u4eec\u5c06\u5176\u4e0e\u5411\u91cf\u6570\u636e\u5e93\u4e2d\u7684\u6240\u6709\u5411\u91cf\u8fdb\u884c\u6bd4\u8f83\uff0c\u5e76\u9009\u62e9\u6700\u76f8\u4f3c\u7684n\u4e2a\uff0c\u8fd4\u56de\u8bed\u8a00\u6a21\u578b\u5f97\u5230\u6700\u7ec8\u7b54\u6848"]}, {"cell_type": "code", "execution_count": 26, "id": "631396c6", "metadata": {"height": 30}, "outputs": [], "source": ["#\u521b\u5efa\u4e00\u4e2a\u6587\u6863\u52a0\u8f7d\u5668\uff0c\u901a\u8fc7csv\u683c\u5f0f\u52a0\u8f7d\n", "loader = CSVLoader(file_path=file)\n", "docs = loader.load()"]}, {"cell_type": "code", "execution_count": 27, "id": "4a977f44", "metadata": {"height": 30}, "outputs": [{"data": {"text/plain": ["Document(page_content=\": 0\\nname: Women's Campside Oxfords\\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \\n\\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \\n\\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \\n\\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT\u00ae antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \\n\\nQuestions? Please contact us for any inquiries.\", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})"]}, "execution_count": 27, "metadata": {}, "output_type": "execute_result"}], "source": ["docs[0]#\u67e5\u770b\u5355\u4e2a\u6587\u6863\uff0c\u6211\u4eec\u53ef\u4ee5\u770b\u5230\u6bcf\u4e2a\u6587\u6863\u5bf9\u5e94\u4e8eCSV\u4e2d\u7684\u4e00\u4e2a\u5757"]}, {"cell_type": "code", "execution_count": 31, "id": "e875693a", "metadata": {"height": 47}, "outputs": [], "source": ["'''\n", "\u56e0\u4e3a\u8fd9\u4e9b\u6587\u6863\u5df2\u7ecf\u975e\u5e38\u5c0f\u4e86\uff0c\u6240\u4ee5\u6211\u4eec\u5b9e\u9645\u4e0a\u4e0d\u9700\u8981\u5728\u8fd9\u91cc\u8fdb\u884c\u4efb\u4f55\u5206\u5757,\u53ef\u4ee5\u76f4\u63a5\u8fdb\u884cembedding\n", "'''\n", "\n", "from langchain.embeddings import OpenAIEmbeddings #\u8981\u521b\u5efa\u53ef\u4ee5\u76f4\u63a5\u8fdb\u884cembedding\uff0c\u6211\u4eec\u5c06\u4f7f\u7528OpenAI\u7684\u53ef\u4ee5\u76f4\u63a5\u8fdb\u884cembedding\u7c7b\n", "embeddings = OpenAIEmbeddings() #\u521d\u59cb\u5316"]}, {"cell_type": "code", "execution_count": 32, "id": "779bec75", "metadata": {"height": 30}, "outputs": [], "source": ["embed = embeddings.embed_query(\"Hi my name is Harrison\")#\u8ba9\u6211\u4eec\u4f7f\u7528embedding\u4e0a\u7684\u67e5\u8be2\u65b9\u6cd5\u4e3a\u7279\u5b9a\u6587\u672c\u521b\u5efaembedding"]}, {"cell_type": "code", "execution_count": 33, "id": "699aaaf9", "metadata": {"height": 30}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["1536\n"]}], "source": ["print(len(embed))#\u67e5\u770b\u8fd9\u4e2aembedding\uff0c\u6211\u4eec\u53ef\u4ee5\u770b\u5230\u6709\u8d85\u8fc7\u4e00\u5343\u4e2a\u4e0d\u540c\u7684\u5143\u7d20"]}, {"cell_type": "code", "execution_count": 34, "id": "9d00d346", "metadata": {"height": 30}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["[-0.021933607757091522, 0.006697045173496008, -0.01819835603237152, -0.039113257080316544, -0.014060650952160358]\n"]}], "source": ["print(embed[:5])#\u6bcf\u4e2a\u5143\u7d20\u90fd\u662f\u4e0d\u540c\u7684\u6570\u5b57\u503c\uff0c\u7ec4\u5408\u8d77\u6765\uff0c\u8fd9\u5c31\u521b\u5efa\u4e86\u8fd9\u6bb5\u6587\u672c\u7684\u603b\u4f53\u6570\u503c\u8868\u793a"]}, {"cell_type": "code", "execution_count": 35, "id": "27ad0bb0", "metadata": {"height": 81}, "outputs": [], "source": ["'''\n", "\u4e3a\u521a\u624d\u7684\u6587\u672c\u521b\u5efaembedding\uff0c\u51c6\u5907\u5c06\u5b83\u4eec\u5b58\u50a8\u5728\u5411\u91cf\u5b58\u50a8\u4e2d\uff0c\u4f7f\u7528\u5411\u91cf\u5b58\u50a8\u4e0a\u7684from documents\u65b9\u6cd5\u6765\u5b9e\u73b0\u3002\n", "\u8be5\u65b9\u6cd5\u63a5\u53d7\u6587\u6863\u5217\u8868\u3001\u5d4c\u5165\u5bf9\u8c61\uff0c\u7136\u540e\u6211\u4eec\u5c06\u521b\u5efa\u4e00\u4e2a\u603b\u4f53\u5411\u91cf\u5b58\u50a8\n", "'''\n", "db = DocArrayInMemorySearch.from_documents(\n", " docs, \n", " embeddings\n", ")"]}, {"cell_type": "code", "execution_count": 36, "id": "0329bfd5", "metadata": {"height": 30}, "outputs": [], "source": ["query = \"Please suggest a shirt with sunblocking\""]}, {"cell_type": "code", "execution_count": 37, "id": "7909c6b7", "metadata": {"height": 30}, "outputs": [], "source": ["docs = db.similarity_search(query)#\u4f7f\u7528\u8fd9\u4e2a\u5411\u91cf\u5b58\u50a8\u6765\u67e5\u627e\u4e0e\u4f20\u5165\u67e5\u8be2\u7c7b\u4f3c\u7684\u6587\u672c\uff0c\u5982\u679c\u6211\u4eec\u5728\u5411\u91cf\u5b58\u50a8\u4e2d\u4f7f\u7528\u76f8\u4f3c\u6027\u641c\u7d22\u65b9\u6cd5\u5e76\u4f20\u5165\u4e00\u4e2a\u67e5\u8be2\uff0c\u6211\u4eec\u5c06\u5f97\u5230\u4e00\u4e2a\u6587\u6863\u5217\u8868"]}, {"cell_type": "code", "execution_count": 38, "id": "43321853", "metadata": {"height": 30}, "outputs": [{"data": {"text/plain": ["4"]}, "execution_count": 38, "metadata": {}, "output_type": "execute_result"}], "source": ["len(docs)# \u6211\u4eec\u53ef\u4ee5\u770b\u5230\u5b83\u8fd4\u56de\u4e86\u56db\u4e2a\u6587\u6863"]}, {"cell_type": "code", "execution_count": 39, "id": "6eba90b5", "metadata": {"height": 30}, "outputs": [{"data": {"text/plain": ["Document(page_content=': 255\\nname: Sun Shield Shirt by\\ndescription: \"Block the sun, not the fun \u2013 our high-performance sun shirt is guaranteed to protect from harmful UV rays. \\n\\nSize & Fit: Slightly Fitted: Softly shapes the body. Falls at hip.\\n\\nFabric & Care: 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated \u2013 the highest rated sun protection possible. Handwash, line dry.\\n\\nAdditional Features: Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion resistant for season after season of wear. Imported.\\n\\nSun Protection That Won\\'t Wear Off\\nOur high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun\\'s harmful rays. This fabric is recommended by The Skin Cancer Foundation as an effective UV protectant.', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 255})"]}, "execution_count": 39, "metadata": {}, "output_type": "execute_result"}], "source": ["docs[0] #\uff0c\u5982\u679c\u6211\u4eec\u770b\u7b2c\u4e00\u4e2a\u6587\u6863\uff0c\u6211\u4eec\u53ef\u4ee5\u770b\u5230\u5b83\u786e\u5b9e\u662f\u4e00\u4ef6\u5173\u4e8e\u9632\u6652\u7684\u886c\u886b"]}, {"cell_type": "markdown", "id": "fe41b36f", "metadata": {}, "source": ["## \u4e8c\u3001 \u5982\u4f55\u56de\u7b54\u6211\u4eec\u6587\u6863\u7684\u76f8\u5173\u95ee\u9898\n", "\u9996\u5148\uff0c\u6211\u4eec\u9700\u8981\u4ece\u8fd9\u4e2a\u5411\u91cf\u5b58\u50a8\u4e2d\u521b\u5efa\u4e00\u4e2a\u68c0\u7d22\u5668\uff0c\u68c0\u7d22\u5668\u662f\u4e00\u4e2a\u901a\u7528\u63a5\u53e3\uff0c\u53ef\u4ee5\u7531\u4efb\u4f55\u63a5\u53d7\u67e5\u8be2\u5e76\u8fd4\u56de\u6587\u6863\u7684\u65b9\u6cd5\u652f\u6301\u3002\u63a5\u4e0b\u6765\uff0c\u56e0\u4e3a\u6211\u4eec\u60f3\u8981\u8fdb\u884c\u6587\u672c\u751f\u6210\u5e76\u8fd4\u56de\u81ea\u7136\u8bed\u8a00\u54cd\u5e94\n"]}, {"cell_type": "code", "execution_count": 40, "id": "c0c3596e", "metadata": {"height": 30}, "outputs": [], "source": ["retriever = db.as_retriever() #\u521b\u5efa\u68c0\u7d22\u5668\u901a\u7528\u63a5\u53e3"]}, {"cell_type": "code", "execution_count": 55, "id": "0625f5e8", "metadata": {"height": 47}, "outputs": [], "source": ["llm = ChatOpenAI(temperature = 0.0,max_tokens=1024) #\u5bfc\u5165\u8bed\u8a00\u6a21\u578b\n"]}, {"cell_type": "code", "execution_count": 43, "id": "a573f58a", "metadata": {"height": 47}, "outputs": [], "source": ["qdocs = \"\".join([docs[i].page_content for i in range(len(docs))]) # \u5c06\u5408\u5e76\u6587\u6863\u4e2d\u7684\u6240\u6709\u9875\u9762\u5185\u5bb9\u5230\u4e00\u4e2a\u53d8\u91cf\u4e2d\n"]}, {"cell_type": "code", "execution_count": null, "id": "14682d95", "metadata": {"height": 64}, "outputs": [], "source": ["response = llm.call_as_llm(f\"{qdocs} Question: Please list all your \\\n", "shirts with sun protection in a table in markdown and summarize each one.\") #\u5217\u51fa\u6240\u6709\u5177\u6709\u9632\u6652\u529f\u80fd\u7684\u886c\u886b\u5e76\u5728Markdown\u8868\u683c\u4e2d\u603b\u7ed3\u6bcf\u4e2a\u886c\u886b\u7684\u8bed\u8a00\u6a21\u578b\n"]}, {"cell_type": "code", "execution_count": 28, "id": "8bba545b", "metadata": {"height": 30}, "outputs": [{"data": {"text/markdown": ["| Name | Description |\n", "| --- | --- |\n", "| Sun Shield Shirt | High-performance sun shirt with UPF 50+ sun protection, moisture-wicking, and abrasion-resistant fabric. Recommended by The Skin Cancer Foundation. |\n", "| Men's Plaid Tropic Shirt | Ultracomfortable shirt with UPF 50+ sun protection, wrinkle-free fabric, and front/back cape venting. Made with 52% polyester and 48% nylon. |\n", "| Men's TropicVibe Shirt | Men's sun-protection shirt with built-in UPF 50+ and front/back cape venting. Made with 71% nylon and 29% polyester. |\n", "| Men's Tropical Plaid Short-Sleeve Shirt | Lightest hot-weather shirt with UPF 50+ sun protection, front/back cape venting, and two front bellows pockets. Made with 100% polyester and is wrinkle-resistant. |\n", "\n", "All of these shirts provide UPF 50+ sun protection, blocking 98% of the sun's harmful rays. They are made with high-performance fabrics that are moisture-wicking, wrinkle-resistant, and abrasion-resistant. The Men's Plaid Tropic Shirt and Men's Tropical Plaid Short-Sleeve Shirt both have front/back cape venting for added breathability. The Sun Shield Shirt is recommended by The Skin Cancer Foundation as an effective UV protectant."], "text/plain": ["<IPython.core.display.Markdown object>"]}, "metadata": {}, "output_type": "display_data"}], "source": ["display(Markdown(response))"]}, {"cell_type": "markdown", "id": "12f042e7", "metadata": {}, "source": ["\u5728\u6b64\u5904\u6253\u5370\u54cd\u5e94\uff0c\u6211\u4eec\u53ef\u4ee5\u770b\u5230\u6211\u4eec\u5f97\u5230\u4e86\u4e00\u4e2a\u8868\u683c\uff0c\u6b63\u5982\u6211\u4eec\u6240\u8981\u6c42\u7684\u90a3\u6837"]}, {"cell_type": "code", "execution_count": 56, "id": "32c94d22", "metadata": {"height": 115}, "outputs": [], "source": ["''' \n", "\u901a\u8fc7LangChain\u94fe\u5c01\u88c5\u8d77\u6765\n", "\u521b\u5efa\u4e00\u4e2a\u68c0\u7d22QA\u94fe\uff0c\u5bf9\u68c0\u7d22\u5230\u7684\u6587\u6863\u8fdb\u884c\u95ee\u9898\u56de\u7b54\uff0c\u8981\u521b\u5efa\u8fd9\u6837\u7684\u94fe\uff0c\u6211\u4eec\u5c06\u4f20\u5165\u51e0\u4e2a\u4e0d\u540c\u7684\u4e1c\u897f\n", "1\u3001\u8bed\u8a00\u6a21\u578b\uff0c\u5728\u6700\u540e\u8fdb\u884c\u6587\u672c\u751f\u6210\n", "2\u3001\u4f20\u5165\u94fe\u7c7b\u578b\uff0c\u8fd9\u91cc\u4f7f\u7528stuff\uff0c\u5c06\u6240\u6709\u6587\u6863\u585e\u5165\u4e0a\u4e0b\u6587\u5e76\u5bf9\u8bed\u8a00\u6a21\u578b\u8fdb\u884c\u4e00\u6b21\u8c03\u7528\n", "3\u3001\u4f20\u5165\u4e00\u4e2a\u68c0\u7d22\u5668\n", "'''\n", "\n", "\n", "qa_stuff = RetrievalQA.from_chain_type(\n", " llm=llm, \n", " chain_type=\"stuff\", \n", " retriever=retriever, \n", " verbose=True\n", ")"]}, {"cell_type": "code", "execution_count": 46, "id": "e4769316", "metadata": {"height": 47}, "outputs": [], "source": ["query = \"Please list all your shirts with sun protection in a table \\\n", "in markdown and summarize each one.\"#\u521b\u5efa\u4e00\u4e2a\u67e5\u8be2\u5e76\u5728\u6b64\u67e5\u8be2\u4e0a\u8fd0\u884c\u94fe"]}, {"cell_type": "code", "execution_count": null, "id": "1fc3c2f3", "metadata": {"height": 30}, "outputs": [], "source": ["response = qa_stuff.run(query)"]}, {"cell_type": "code", "execution_count": 58, "id": "fba1a5db", "metadata": {"height": 30}, "outputs": [{"data": {"text/markdown": ["\n", "\n", "| Name | Description |\n", "| --- | --- |\n", "| Men's Tropical Plaid Short-Sleeve Shirt | UPF 50+ rated, 100% polyester, wrinkle-resistant, front and back cape venting, two front bellows pockets |\n", "| Men's Plaid Tropic Shirt, Short-Sleeve | UPF 50+ rated, 52% polyester and 48% nylon, machine washable and dryable, front and back cape venting, two front bellows pockets |\n", "| Men's TropicVibe Shirt, Short-Sleeve | UPF 50+ rated, 71% Nylon, 29% Polyester, 100% Polyester knit mesh, machine wash and dry, front and back cape venting, two front bellows pockets |\n", "| Sun Shield Shirt by | UPF 50+ rated, 78% nylon, 22% Lycra Xtra Life fiber, handwash, line dry, wicks moisture, fits comfortably over swimsuit, abrasion resistant |\n", "\n", "All four shirts provide UPF 50+ sun protection, blocking 98% of the sun's harmful rays. The Men's Tropical Plaid Short-Sleeve Shirt is made of 100% polyester and is wrinkle-resistant"], "text/plain": ["<IPython.core.display.Markdown object>"]}, "metadata": {}, "output_type": "display_data"}], "source": ["display(Markdown(response))#\u4f7f\u7528 display \u548c markdown \u663e\u793a\u5b83"]}, {"cell_type": "markdown", "id": "e28c5657", "metadata": {}, "source": ["\u8fd9\u4e24\u4e2a\u65b9\u5f0f\u8fd4\u56de\u76f8\u540c\u7684\u7ed3\u679c"]}, {"cell_type": "markdown", "id": "44f1fa38", "metadata": {}, "source": ["### 1.3 \u4e0d\u540c\u7c7b\u578b\u7684chain\u94fe\n", "\u60f3\u5728\u8bb8\u591a\u4e0d\u540c\u7c7b\u578b\u7684\u5757\u4e0a\u6267\u884c\u76f8\u540c\u7c7b\u578b\u7684\u95ee\u7b54\uff0c\u8be5\u600e\u4e48\u529e\uff1f\u4e4b\u524d\u7684\u5b9e\u9a8c\u4e2d\u53ea\u8fd4\u56de\u4e864\u4e2a\u6587\u6863\uff0c\u5982\u679c\u6709\u591a\u4e2a\u6587\u6863\uff0c\u90a3\u4e48\u6211\u4eec\u53ef\u4ee5\u4f7f\u7528\u51e0\u79cd\u4e0d\u540c\u7684\u65b9\u6cd5\n", "* Map Reduce \n", "\u5c06\u6240\u6709\u5757\u4e0e\u95ee\u9898\u4e00\u8d77\u4f20\u9012\u7ed9\u8bed\u8a00\u6a21\u578b\uff0c\u83b7\u53d6\u56de\u590d\uff0c\u4f7f\u7528\u53e6\u4e00\u4e2a\u8bed\u8a00\u6a21\u578b\u8c03\u7528\u5c06\u6240\u6709\u5355\u72ec\u7684\u56de\u590d\u603b\u7ed3\u6210\u6700\u7ec8\u7b54\u6848\uff0c\u5b83\u53ef\u4ee5\u5728\u4efb\u610f\u6570\u91cf\u7684\u6587\u6863\u4e0a\u8fd0\u884c\u3002\u53ef\u4ee5\u5e76\u884c\u5904\u7406\u5355\u4e2a\u95ee\u9898\uff0c\u540c\u65f6\u4e5f\u9700\u8981\u66f4\u591a\u7684\u8c03\u7528\u3002\u5b83\u5c06\u6240\u6709\u6587\u6863\u89c6\u4e3a\u72ec\u7acb\u7684\n", "* Refine \n", "\u7528\u4e8e\u5faa\u73af\u8bb8\u591a\u6587\u6863\uff0c\u9645\u4e0a\u662f\u8fed\u4ee3\u7684\uff0c\u5efa\u7acb\u5728\u5148\u524d\u6587\u6863\u7684\u7b54\u6848\u4e4b\u4e0a\uff0c\u975e\u5e38\u9002\u5408\u524d\u540e\u56e0\u679c\u4fe1\u606f\u5e76\u968f\u65f6\u95f4\u9010\u6b65\u6784\u5efa\u7b54\u6848\uff0c\u4f9d\u8d56\u4e8e\u5148\u524d\u8c03\u7528\u7684\u7ed3\u679c\u3002\u5b83\u901a\u5e38\u9700\u8981\u66f4\u957f\u7684\u65f6\u95f4\uff0c\u5e76\u4e14\u57fa\u672c\u4e0a\u9700\u8981\u4e0eMap Reduce\u4e00\u6837\u591a\u7684\u8c03\u7528\n", "* Map Re-rank \n", "\u5bf9\u6bcf\u4e2a\u6587\u6863\u8fdb\u884c\u5355\u4e2a\u8bed\u8a00\u6a21\u578b\u8c03\u7528\uff0c\u8981\u6c42\u5b83\u8fd4\u56de\u4e00\u4e2a\u5206\u6570\uff0c\u9009\u62e9\u6700\u9ad8\u5206\uff0c\u8fd9\u4f9d\u8d56\u4e8e\u8bed\u8a00\u6a21\u578b\u77e5\u9053\u5206\u6570\u5e94\u8be5\u662f\u4ec0\u4e48\uff0c\u9700\u8981\u544a\u8bc9\u5b83\uff0c\u5982\u679c\u5b83\u4e0e\u6587\u6863\u76f8\u5173\uff0c\u5219\u5e94\u8be5\u662f\u9ad8\u5206\uff0c\u5e76\u5728\u90a3\u91cc\u7cbe\u7ec6\u8c03\u6574\u8bf4\u660e\uff0c\u53ef\u4ee5\u6279\u91cf\u5904\u7406\u5b83\u4eec\u76f8\u5bf9\u8f83\u5feb\uff0c\u4f46\u662f\u66f4\u52a0\u6602\u8d35\n", "* Stuff \n", "\u5c06\u6240\u6709\u5185\u5bb9\u7ec4\u5408\u6210\u4e00\u4e2a\u6587\u6863"]}, {"cell_type": "code", "execution_count": null, "id": "41c9d68a-251a-41f1-a571-f6a13b3d7b40", "metadata": {}, "outputs": [], "source": []}], "metadata": {"kernelspec": {"display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12"}, "toc": {"base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true}}, "nbformat": 4, "nbformat_minor": 5}