element.\n",
+ "\n",
+ "Technical specifications: ```{fact_sheet_chair}```\n",
+ "\"\"\"\n",
+ "\n",
+ "response = get_completion(prompt)\n",
+ "print(response)\n",
+ "\n",
+ "# 表格是以 HTML 格式呈现的,加载出来\n",
+ "from IPython.display import display, HTML\n",
+ "\n",
+ "display(HTML(response))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.11"
+ },
+ "latex_envs": {
+ "LaTeX_envs_menu_present": true,
+ "autoclose": false,
+ "autocomplete": true,
+ "bibliofile": "biblio.bib",
+ "cite_by": "apalike",
+ "current_citInitial": 1,
+ "eqLabelWithNumbers": true,
+ "eqNumInitial": 1,
+ "hotkeys": {
+ "equation": "Ctrl-E",
+ "itemize": "Ctrl-I"
+ },
+ "labels_anchors": false,
+ "latex_user_defs": false,
+ "report_style_numbering": false,
+ "user_envs_cfg": false
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": true
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/docs/content/C1 Prompt Engineering for Developer/4. 文本概括 Summarizing.ipynb b/docs/content/C1 Prompt Engineering for Developer/4. 文本概括 Summarizing.ipynb
new file mode 100644
index 0000000..5b877cf
--- /dev/null
+++ b/docs/content/C1 Prompt Engineering for Developer/4. 文本概括 Summarizing.ipynb
@@ -0,0 +1 @@
+{"cells":[{"attachments":{},"cell_type":"markdown","id":"b58204ea","metadata":{},"source":["# 第四章 文本概括\n"]},{"attachments":{},"cell_type":"markdown","id":"12fa9ea4","metadata":{},"source":["当今世界上文本信息浩如烟海,我们很难拥有足够的时间去阅读所有想了解的东西。但欣喜的是,目前LLM在文本概括任务上展现了强大的水准,也已经有不少团队将概括功能实现在多种应用中。\n","\n","本章节将介绍如何使用编程的方式,调用API接口来实现“文本概括”功能。"]},{"attachments":{},"cell_type":"markdown","id":"9cca835b","metadata":{},"source":["## 一、单一文本概括"]},{"attachments":{},"cell_type":"markdown","id":"0c1e1b92","metadata":{},"source":["以商品评论的总结任务为例:对于电商平台来说,网站上往往存在着海量的商品评论,这些评论反映了所有客户的想法。如果我们拥有一个工具去概括这些海量、冗长的评论,便能够快速地浏览更多评论,洞悉客户的偏好,从而指导平台与商家提供更优质的服务。"]},{"attachments":{},"cell_type":"markdown","id":"aad5bd2a","metadata":{},"source":["**输入文本**"]},{"cell_type":"code","execution_count":2,"id":"43b5dd25","metadata":{},"outputs":[],"source":["prod_review = \"\"\"\n","这个熊猫公仔是我给女儿的生日礼物,她很喜欢,去哪都带着。\n","公仔很软,超级可爱,面部表情也很和善。但是相比于价钱来说,\n","它有点小,我感觉在别的地方用同样的价钱能买到更大的。\n","快递比预期提前了一天到货,所以在送给女儿之前,我自己玩了会。\n","\"\"\""]},{"attachments":{},"cell_type":"markdown","id":"662c9cd2","metadata":{},"source":["### 1.1 限制输出文本长度"]},{"attachments":{},"cell_type":"markdown","id":"a6d10814","metadata":{},"source":["我们尝试限制文本长度为最多30词。"]},{"cell_type":"code","execution_count":5,"id":"bf4b39f9","metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["熊猫公仔软可爱,女儿喜欢,但有点小。快递提前一天到货。\n"]}],"source":["from tool import get_completion\n","\n","prompt = f\"\"\"\n","您的任务是从电子商务网站上生成一个产品评论的简短摘要。\n","\n","请对三个反引号之间的评论文本进行概括,最多30个词汇。\n","\n","评论: ```{prod_review}```\n","\"\"\"\n","\n","response = get_completion(prompt)\n","print(response)"]},{"attachments":{},"cell_type":"markdown","id":"e9ab145e","metadata":{},"source":["### 1.2 设置关键角度侧重"]},{"attachments":{},"cell_type":"markdown","id":"f84d0123","metadata":{},"source":["有时,针对不同的业务,我们对文本的侧重会有所不同。例如对于商品评论文本,物流会更关心运输时效,商家更加关心价格与商品质量,平台更关心整体服务体验。\n","\n","我们可以通过增加Prompt提示,来体现对于某个特定角度的侧重。"]},{"attachments":{},"cell_type":"markdown","id":"d6f8509a","metadata":{},"source":["#### 1.2.1 侧重于快递服务"]},{"cell_type":"code","execution_count":7,"id":"80636c3e","metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["快递提前到货,公仔可爱但有点小。\n"]}],"source":["prompt = f\"\"\"\n","您的任务是从电子商务网站上生成一个产品评论的简短摘要。\n","\n","请对三个反引号之间的评论文本进行概括,最多30个词汇,并且侧重在快递服务上。\n","\n","评论: ```{prod_review}```\n","\"\"\"\n","\n","response = get_completion(prompt)\n","print(response)"]},{"attachments":{},"cell_type":"markdown","id":"76c97fea","metadata":{},"source":["可以看到,输出结果以“快递提前到货”开头,体现了对于快递效率的侧重。"]},{"attachments":{},"cell_type":"markdown","id":"83275907","metadata":{},"source":["#### 1.2.2 侧重于价格与质量"]},{"cell_type":"code","execution_count":8,"id":"728d6c57","metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["可爱的熊猫公仔,质量好但有点小,价格稍高。快递提前到货。\n"]}],"source":["prompt = f\"\"\"\n","您的任务是从电子商务网站上生成一个产品评论的简短摘要。\n","\n","请对三个反引号之间的评论文本进行概括,最多30个词汇,并且侧重在产品价格和质量上。\n","\n","评论: ```{prod_review}```\n","\"\"\"\n","\n","response = get_completion(prompt)\n","print(response)"]},{"attachments":{},"cell_type":"markdown","id":"972dbb1b","metadata":{},"source":["可以看到,输出结果以“可爱的熊猫公仔,质量好但有点小,价格稍高”开头,体现了对于产品价格与质量的侧重。"]},{"attachments":{},"cell_type":"markdown","id":"b3ed53d2","metadata":{},"source":["### 1.3 关键信息提取"]},{"attachments":{},"cell_type":"markdown","id":"ba6f5c25","metadata":{},"source":["在1.2节中,虽然我们通过添加关键角度侧重的 Prompt ,使得文本摘要更侧重于某一特定方面,但是可以发现,结果中也会保留一些其他信息,如偏重价格与质量角度的概括中仍保留了“快递提前到货”的信息。如果我们只想要提取某一角度的信息,并过滤掉其他所有信息,则可以要求 LLM 进行“文本提取( Extract )”而非“概括( Summarize )”"]},{"cell_type":"code","execution_count":9,"id":"c845ccab","metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["产品运输相关的信息:快递提前一天到货。\n"]}],"source":["prompt = f\"\"\"\n","您的任务是从电子商务网站上的产品评论中提取相关信息。\n","\n","请从以下三个反引号之间的评论文本中提取产品运输相关的信息,最多30个词汇。\n","\n","评论: ```{prod_review}```\n","\"\"\"\n","\n","response = get_completion(prompt)\n","print(response)"]},{"attachments":{},"cell_type":"markdown","id":"50498a2b","metadata":{},"source":["## 二、同时概括多条文本"]},{"attachments":{},"cell_type":"markdown","id":"a291541a","metadata":{},"source":["在实际的工作流中,我们往往有许许多多的评论文本,以下示例将多条用户评价放进列表,并利用 ```for``` 循环,使用文本概括(Summarize)提示词,将评价概括至小于 20 词,并按顺序打印。当然,在实际生产中,对于不同规模的评论文本,除了使用 ```for``` 循环以外,还可能需要考虑整合评论、分布式等方法提升运算效率。您可以搭建主控面板,来总结大量用户评论,来方便您或他人快速浏览,还可以点击查看原评论。这样您能高效掌握顾客的所有想法。"]},{"cell_type":"code","execution_count":3,"id":"ef606961","metadata":{},"outputs":[],"source":["review_1 = prod_review\n","\n","# 一盏落地灯的评论\n","review_2 = \"\"\"\n","我需要一盏漂亮的卧室灯,这款灯不仅具备额外的储物功能,价格也并不算太高。\n","收货速度非常快,仅用了两天的时间就送到了。\n","不过,在运输过程中,灯的拉线出了问题,幸好,公司很乐意寄送了一根全新的灯线。\n","新的灯线也很快就送到手了,只用了几天的时间。\n","装配非常容易。然而,之后我发现有一个零件丢失了,于是我联系了客服,他们迅速地给我寄来了缺失的零件!\n","对我来说,这是一家非常关心客户和产品的优秀公司。\n","\"\"\"\n","\n","# 一把电动牙刷的评论\n","review_3 = \"\"\"\n","我的牙科卫生员推荐了电动牙刷,所以我就买了这款。\n","到目前为止,电池续航表现相当不错。\n","初次充电后,我在第一周一直将充电器插着,为的是对电池进行条件养护。\n","过去的3周里,我每天早晚都使用它刷牙,但电池依然维持着原来的充电状态。\n","不过,牙刷头太小了。我见过比这个牙刷头还大的婴儿牙刷。\n","我希望牙刷头更大一些,带有不同长度的刷毛,\n","这样可以更好地清洁牙齿间的空隙,但这款牙刷做不到。\n","总的来说,如果你能以50美元左右的价格购买到这款牙刷,那是一个不错的交易。\n","制造商的替换刷头相当昂贵,但你可以购买价格更为合理的通用刷头。\n","这款牙刷让我感觉就像每天都去了一次牙医,我的牙齿感觉非常干净!\n","\"\"\"\n","\n","# 一台搅拌机的评论\n","review_4 = \"\"\"\n","在11月份期间,这个17件套装还在季节性促销中,售价约为49美元,打了五折左右。\n","可是由于某种原因(我们可以称之为价格上涨),到了12月的第二周,所有的价格都上涨了,\n","同样的套装价格涨到了70-89美元不等。而11件套装的价格也从之前的29美元上涨了约10美元。\n","看起来还算不错,但是如果你仔细看底座,刀片锁定的部分看起来没有前几年版本的那么漂亮。\n","然而,我打算非常小心地使用它\n","(例如,我会先在搅拌机中研磨豆类、冰块、大米等坚硬的食物,然后再将它们研磨成所需的粒度,\n","接着切换到打蛋器刀片以获得更细的面粉,如果我需要制作更细腻/少果肉的食物)。\n","在制作冰沙时,我会将要使用的水果和蔬菜切成细小块并冷冻\n","(如果使用菠菜,我会先轻微煮熟菠菜,然后冷冻,直到使用时准备食用。\n","如果要制作冰糕,我会使用一个小到中号的食物加工器),这样你就可以避免添加过多的冰块。\n","大约一年后,电机开始发出奇怪的声音。我打电话给客户服务,但保修期已经过期了,\n","所以我只好购买了另一台。值得注意的是,这类产品的整体质量在过去几年里有所下降\n",",所以他们在一定程度上依靠品牌认知和消费者忠诚来维持销售。在大约两天内,我收到了新的搅拌机。\n","\"\"\"\n","\n","reviews = [review_1, review_2, review_3, review_4]\n"]},{"cell_type":"code","execution_count":4,"id":"eb878522","metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["评论1: 熊猫公仔是生日礼物,女儿喜欢,软可爱,面部表情和善。价钱有点小,快递提前一天到货。 \n","\n","评论2: 漂亮卧室灯,储物功能,快速送达,灯线问题,快速解决,容易装配,关心客户和产品。 \n","\n","评论3: 这款电动牙刷电池续航好,但牙刷头太小,价格合理,清洁效果好。 \n","\n","评论4: 该评论提到了一个17件套装的产品,在11月份有折扣销售,但在12月份价格上涨。评论者提到了产品的外观和使用方法,并提到了产品质量下降的问题。最后,评论者提到他们购买了另一台搅拌机。 \n","\n"]}],"source":["for i in range(len(reviews)):\n"," prompt = f\"\"\"\n"," 你的任务是从电子商务网站上的产品评论中提取相关信息。\n","\n"," 请对三个反引号之间的评论文本进行概括,最多20个词汇。\n","\n"," 评论文本: ```{reviews[i]}```\n"," \"\"\"\n"," response = get_completion(prompt)\n"," print(f\"评论{i+1}: \", response, \"\\n\")\n"]},{"cell_type":"markdown","id":"f118c0cc","metadata":{},"source":["## 三、英文版"]},{"cell_type":"markdown","id":"a08635df","metadata":{},"source":["**1.1 单一文本概括**"]},{"cell_type":"code","execution_count":12,"id":"e55327d5","metadata":{},"outputs":[],"source":["prod_review = \"\"\"\n","Got this panda plush toy for my daughter's birthday, \\\n","who loves it and takes it everywhere. It's soft and \\ \n","super cute, and its face has a friendly look. It's \\ \n","a bit small for what I paid though. I think there \\ \n","might be other options that are bigger for the \\ \n","same price. It arrived a day earlier than expected, \\ \n","so I got to play with it myself before I gave it \\ \n","to her.\n","\"\"\""]},{"cell_type":"code","execution_count":13,"id":"30c2ef51","metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["This panda plush toy is loved by the reviewer's daughter, but they feel it is a bit small for the price.\n"]}],"source":["prompt = f\"\"\"\n","Your task is to generate a short summary of a product \\\n","review from an ecommerce site. \n","\n","Summarize the review below, delimited by triple \n","backticks, in at most 30 words. \n","\n","Review: ```{prod_review}```\n","\"\"\"\n","\n","response = get_completion(prompt)\n","print(response)"]},{"cell_type":"markdown","id":"9bdcfc1b","metadata":{},"source":["**1.2 设置关键角度侧重**"]},{"cell_type":"markdown","id":"5dd0534f","metadata":{},"source":["1.2.1 侧重于快递服务"]},{"cell_type":"code","execution_count":14,"id":"b354cc3f","metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["The customer is happy with the product but suggests offering larger options for the same price. They were pleased with the early delivery.\n"]}],"source":["prompt = f\"\"\"\n","Your task is to generate a short summary of a product \\\n","review from an ecommerce site to give feedback to the \\\n","Shipping deparmtment. \n","\n","Summarize the review below, delimited by triple \n","backticks, in at most 30 words, and focusing on any aspects \\\n","that mention shipping and delivery of the product. \n","\n","Review: ```{prod_review}```\n","\"\"\"\n","\n","response = get_completion(prompt)\n","print(response)"]},{"cell_type":"markdown","id":"af6aaf3a","metadata":{},"source":["1.2.2 侧重于价格和质量"]},{"cell_type":"code","execution_count":15,"id":"1b5358fd","metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["The customer loves the panda plush toy for its softness and cuteness, but feels it is overpriced compared to other options available.\n"]}],"source":["prompt = f\"\"\"\n","Your task is to generate a short summary of a product \\\n","review from an ecommerce site to give feedback to the \\\n","pricing deparmtment, responsible for determining the \\\n","price of the product. \n","\n","Summarize the review below, delimited by triple \n","backticks, in at most 30 words, and focusing on any aspects \\\n","that are relevant to the price and perceived value. \n","\n","Review: ```{prod_review}```\n","\"\"\"\n","\n","response = get_completion(prompt)\n","print(response)"]},{"cell_type":"markdown","id":"0f582677","metadata":{},"source":["**1.3 关键信息提取**"]},{"cell_type":"code","execution_count":16,"id":"32c87014","metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["The shipping department should take note that the product arrived a day earlier than expected.\n"]}],"source":["prompt = f\"\"\"\n","Your task is to extract relevant information from \\ \n","a product review from an ecommerce site to give \\\n","feedback to the Shipping department. \n","\n","From the review below, delimited by triple quotes \\\n","extract the information relevant to shipping and \\ \n","delivery. Limit to 30 words. \n","\n","Review: ```{prod_review}```\n","\"\"\"\n","\n","response = get_completion(prompt)\n","print(response)"]},{"cell_type":"markdown","id":"2043d100","metadata":{},"source":["**2.1 同时概括多条文本**"]},{"cell_type":"code","execution_count":17,"id":"cff48486","metadata":{},"outputs":[],"source":["review_1 = prod_review \n","\n","# review for a standing lamp\n","review_2 = \"\"\"\n","Needed a nice lamp for my bedroom, and this one \\\n","had additional storage and not too high of a price \\\n","point. Got it fast - arrived in 2 days. The string \\\n","to the lamp broke during the transit and the company \\\n","happily sent over a new one. Came within a few days \\\n","as well. It was easy to put together. Then I had a \\\n","missing part, so I contacted their support and they \\\n","very quickly got me the missing piece! Seems to me \\\n","to be a great company that cares about their customers \\\n","and products. \n","\"\"\"\n","\n","# review for an electric toothbrush\n","review_3 = \"\"\"\n","My dental hygienist recommended an electric toothbrush, \\\n","which is why I got this. The battery life seems to be \\\n","pretty impressive so far. After initial charging and \\\n","leaving the charger plugged in for the first week to \\\n","condition the battery, I've unplugged the charger and \\\n","been using it for twice daily brushing for the last \\\n","3 weeks all on the same charge. But the toothbrush head \\\n","is too small. I’ve seen baby toothbrushes bigger than \\\n","this one. I wish the head was bigger with different \\\n","length bristles to get between teeth better because \\\n","this one doesn’t. Overall if you can get this one \\\n","around the $50 mark, it's a good deal. The manufactuer's \\\n","replacements heads are pretty expensive, but you can \\\n","get generic ones that're more reasonably priced. This \\\n","toothbrush makes me feel like I've been to the dentist \\\n","every day. My teeth feel sparkly clean! \n","\"\"\"\n","\n","# review for a blender\n","review_4 = \"\"\"\n","So, they still had the 17 piece system on seasonal \\\n","sale for around $49 in the month of November, about \\\n","half off, but for some reason (call it price gouging) \\\n","around the second week of December the prices all went \\\n","up to about anywhere from between $70-$89 for the same \\\n","system. And the 11 piece system went up around $10 or \\\n","so in price also from the earlier sale price of $29. \\\n","So it looks okay, but if you look at the base, the part \\\n","where the blade locks into place doesn’t look as good \\\n","as in previous editions from a few years ago, but I \\\n","plan to be very gentle with it (example, I crush \\\n","very hard items like beans, ice, rice, etc. in the \\\n","blender first then pulverize them in the serving size \\\n","I want in the blender then switch to the whipping \\\n","blade for a finer flour, and use the cross cutting blade \\\n","first when making smoothies, then use the flat blade \\\n","if I need them finer/less pulpy). Special tip when making \\\n","smoothies, finely cut and freeze the fruits and \\\n","vegetables (if using spinach-lightly stew soften the \\\n","spinach then freeze until ready for use-and if making \\\n","sorbet, use a small to medium sized food processor) \\\n","that you plan to use that way you can avoid adding so \\\n","much ice if at all-when making your smoothie. \\\n","After about a year, the motor was making a funny noise. \\\n","I called customer service but the warranty expired \\\n","already, so I had to buy another one. FYI: The overall \\\n","quality has gone done in these types of products, so \\\n","they are kind of counting on brand recognition and \\\n","consumer loyalty to maintain sales. Got it in about \\\n","two days.\n","\"\"\"\n","\n","reviews = [review_1, review_2, review_3, review_4]"]},{"cell_type":"code","execution_count":18,"id":"3f61080b","metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["0 Soft and cute panda plush toy loved by daughter, but small for the price. Arrived early. \n","\n","1 Great lamp with storage, fast delivery, excellent customer service, and easy assembly. Highly recommended. \n","\n","2 Impressive battery life, but toothbrush head is too small. Good deal if bought around $50. \n","\n","3 The reviewer found the price increase after the sale disappointing and noticed a decrease in quality over time. \n","\n"]}],"source":["for i in range(len(reviews)):\n"," prompt = f\"\"\"\n"," Your task is to generate a short summary of a product \\\n"," review from an ecommerce site. \n","\n"," Summarize the review below, delimited by triple \\\n"," backticks in at most 20 words. \n","\n"," Review: ```{reviews[i]}```\n"," \"\"\"\n"," response = get_completion(prompt)\n"," print(i, response, \"\\n\")"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.11"},"latex_envs":{"LaTeX_envs_menu_present":true,"autoclose":false,"autocomplete":true,"bibliofile":"biblio.bib","cite_by":"apalike","current_citInitial":1,"eqLabelWithNumbers":true,"eqNumInitial":1,"hotkeys":{"equation":"Ctrl-E","itemize":"Ctrl-I"},"labels_anchors":false,"latex_user_defs":false,"report_style_numbering":false,"user_envs_cfg":false},"toc":{"base_numbering":1,"nav_menu":{},"number_sections":true,"sideBar":true,"skip_h1_title":false,"title_cell":"Table of Contents","title_sidebar":"Contents","toc_cell":false,"toc_position":{},"toc_section_display":true,"toc_window_display":true}},"nbformat":4,"nbformat_minor":5}
diff --git a/docs/content/C1 Prompt Engineering for Developer/5. 推断 Inferring.ipynb b/docs/content/C1 Prompt Engineering for Developer/5. 推断 Inferring.ipynb
new file mode 100644
index 0000000..f0c549f
--- /dev/null
+++ b/docs/content/C1 Prompt Engineering for Developer/5. 推断 Inferring.ipynb
@@ -0,0 +1,1013 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3630c235-f891-4874-bd0a-5277d4d6aa82",
+ "metadata": {},
+ "source": [
+ "# 第五章 推断\n",
+ "\n",
+ "在这节课中,你将从产品评论和新闻文章中推断情感和主题。\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5f3abbee",
+ "metadata": {},
+ "source": [
+ "\n",
+ "推断任务可以看作是模型接收文本作为输入,并执行某种分析的过程。其中涉及提取标签、提取实体、理解文本情感等等。如果你想要从一段文本中提取正面或负面情感,在传统的机器学习工作流程中,需要收集标签数据集、训练模型、确定如何在云端部署模型并进行推断。这样做可能效果还不错,但是执行全流程需要很多工作。而且对于每个任务,如情感分析、提取实体等等,都需要训练和部署单独的模型。\n",
+ "\n",
+ "LLM 的一个非常好的特点是,对于许多这样的任务,你只需要编写一个 Prompt 即可开始产出结果,而不需要进行大量的工作。这极大地加快了应用程序开发的速度。你还可以只使用一个模型和一个 API 来执行许多不同的任务,而不需要弄清楚如何训练和部署许多不同的模型。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "51d2fdfa-c99f-4750-8574-dba7712cd7f0",
+ "metadata": {},
+ "source": [
+ "## 一、情感推断\n",
+ "\n",
+ "### 1.1 情感倾向分析\n",
+ "\n",
+ "以电商平台关于一盏台灯的评论为例,可以对其传达的情感进行二分类(正向/负向)。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "bc6260f0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lamp_review = \"\"\"\n",
+ "我需要一盏漂亮的卧室灯,这款灯具有额外的储物功能,价格也不算太高。\\\n",
+ "我很快就收到了它。在运输过程中,我们的灯绳断了,但是公司很乐意寄送了一个新的。\\\n",
+ "几天后就收到了。这款灯很容易组装。我发现少了一个零件,于是联系了他们的客服,他们很快就给我寄来了缺失的零件!\\\n",
+ "在我看来,Lumina 是一家非常关心顾客和产品的优秀公司!\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cc4ec4ca",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "30d6e4bd-3337-45a3-8c99-a734cdd06743",
+ "metadata": {},
+ "source": [
+ "现在让我们来编写一个 Prompt 来分类这个评论的情感。如果我想让系统告诉我这个评论的情感是什么,只需要编写 “以下产品评论的情感是什么” 这个 Prompt ,加上通常的分隔符和评论文本等等。\n",
+ "\n",
+ "然后让我们运行一下。结果显示这个产品评论的情感是积极的,这似乎是非常正确的。虽然这盏台灯不完美,但这个客户似乎非常满意。这似乎是一家关心客户和产品的伟大公司,可以认为积极的情感似乎是正确的答案。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "ac5b0bb9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "情感是积极的。\n"
+ ]
+ }
+ ],
+ "source": [
+ "from tool import get_completion\n",
+ "\n",
+ "prompt = f\"\"\"\n",
+ "以下用三个反引号分隔的产品评论的情感是什么?\n",
+ "\n",
+ "评论文本: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a562e656",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "76be2320",
+ "metadata": {},
+ "source": [
+ "如果你想要给出更简洁的答案,以便更容易进行后处理,可以在上述 Prompt 基础上添加另一个指令:*用一个单词回答:「正面」或「负面」*。这样就只会打印出 “正面” 这个单词,这使得输出更加统一,方便后续处理。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "84a761b3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "正面\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "以下用三个反引号分隔的产品评论的情感是什么?\n",
+ "\n",
+ "用一个单词回答:「正面」或「负面」。\n",
+ "\n",
+ "评论文本: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "81d2a973-1fa4-4a35-ae35-a2e746c0e91b",
+ "metadata": {},
+ "source": [
+ "### 2.2 识别情感类型\n",
+ "\n",
+ "仍然使用台灯评论,我们尝试另一个 Prompt 。这次我需要模型识别出评论作者所表达的情感,并归纳为列表,不超过五项。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "e615c13a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "满意,感激,赞赏,信任,满足\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 中文\n",
+ "prompt = f\"\"\"\n",
+ "识别以下评论的作者表达的情感。包含不超过五个项目。将答案格式化为以逗号分隔的单词列表。\n",
+ "\n",
+ "评论文本: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c7743a53",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cc4444f7",
+ "metadata": {},
+ "source": [
+ "大型语言模型非常擅长从一段文本中提取特定的东西。在上面的例子中,评论所表达的情感有助于了解客户如何看待特定的产品。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a428d093-51c9-461c-b41e-114e80876409",
+ "metadata": {},
+ "source": [
+ "### 1.3 识别愤怒\n",
+ "\n",
+ "对于很多企业来说,了解某个顾客是否非常生气很重要。所以产生了下述分类问题:以下评论的作者是否表达了愤怒情绪?因为如果有人真的很生气,那么可能值得额外关注,让客户支持或客户成功团队联系客户以了解情况,并为客户解决问题。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "85bad324",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "否\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 中文\n",
+ "prompt = f\"\"\"\n",
+ "以下评论的作者是否表达了愤怒?评论用三个反引号分隔。给出是或否的答案。\n",
+ "\n",
+ "评论文本: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "77905fd8",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "11ca57a2",
+ "metadata": {},
+ "source": [
+ "上面这个例子中,客户并没有生气。注意,如果使用常规的监督学习,如果想要建立所有这些分类器,不可能在几分钟内就做到这一点。我们鼓励大家尝试更改一些这样的 Prompt ,也许询问客户是否表达了喜悦,或者询问是否有任何遗漏的部分,并看看是否可以让 Prompt 对这个灯具评论做出不同的推论。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "936a771e-ca78-4e55-8088-2da6f3820ddc",
+ "metadata": {},
+ "source": [
+ "## 二、信息提取\n",
+ "\n",
+ "### 2.1 商品信息提取 \n",
+ "\n",
+ "接下来,让我们从客户评论中提取更丰富的信息。信息提取是自然语言处理(NLP)的一部分,与从文本中提取你想要知道的某些事物相关。因此,在这个 Prompt 中,我要求它识别以下内容:购买物品和制造物品的公司名称。\n",
+ "\n",
+ "同样,如果你试图总结在线购物电子商务网站的许多评论,对于这些评论来说,弄清楚是什么物品、谁制造了该物品,弄清楚积极和消极的情感,有助于追踪特定物品或制造商收获的用户情感趋势。\n",
+ "\n",
+ "在下面这个示例中,我们要求它将响应格式化为一个 JSON 对象,其中物品和品牌作为键。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "e9ffe056",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{\n",
+ " \"物品\": \"卧室灯\",\n",
+ " \"品牌\": \"Lumina\"\n",
+ "}\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 中文\n",
+ "prompt = f\"\"\"\n",
+ "从评论文本中识别以下项目:\n",
+ "- 评论者购买的物品\n",
+ "- 制造该物品的公司\n",
+ "\n",
+ "评论文本用三个反引号分隔。将你的响应格式化为以 “物品” 和 “品牌” 为键的 JSON 对象。\n",
+ "如果信息不存在,请使用 “未知” 作为值。\n",
+ "让你的回应尽可能简短。\n",
+ " \n",
+ "评论文本: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1342c732",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "954d125d",
+ "metadata": {},
+ "source": [
+ "如上所示,它会说这个物品是一个卧室灯,品牌是 Luminar,你可以轻松地将其加载到 Python 字典中,然后对此输出进行其他处理。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a38880a5-088f-4609-9913-f8fa41fb7ba0",
+ "metadata": {},
+ "source": [
+ "### 2.2 综合情感推断和信息提取\n",
+ "\n",
+ "提取上述所有信息使用了 3 或 4 个 Prompt ,但实际上可以编写单个 Prompt 来同时提取所有这些信息。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "939c2b0e",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{\n",
+ " \"情感倾向\": \"正面\",\n",
+ " \"是否生气\": false,\n",
+ " \"物品类型\": \"卧室灯\",\n",
+ " \"品牌\": \"Lumina\"\n",
+ "}\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 中文\n",
+ "prompt = f\"\"\"\n",
+ "从评论文本中识别以下项目:\n",
+ "- 情绪(正面或负面)\n",
+ "- 审稿人是否表达了愤怒?(是或否)\n",
+ "- 评论者购买的物品\n",
+ "- 制造该物品的公司\n",
+ "\n",
+ "评论用三个反引号分隔。将您的响应格式化为 JSON 对象,以 “情感倾向”、“是否生气”、“物品类型” 和 “品牌” 作为键。\n",
+ "如果信息不存在,请使用 “未知” 作为值。\n",
+ "让你的回应尽可能简短。\n",
+ "将 Anger 值格式化为布尔值。\n",
+ "\n",
+ "评论文本: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5e09a673",
+ "metadata": {},
+ "source": [
+ "这个例子中,我们告诉它将愤怒值格式化为布尔值,然后输出一个 JSON。您可以自己尝试不同的变化,或者甚至尝试完全不同的评论,看看是否仍然可以准确地提取这些内容。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "235fc223-2c89-49ec-ac2d-78a8e74a43ac",
+ "metadata": {},
+ "source": [
+ "## 三、主题推断\n",
+ "\n",
+ "大型语言模型的另一个很酷的应用是推断主题。给定一段长文本,这段文本是关于什么的?有什么话题?以以下一段虚构的报纸报道为例。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "811ff13f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 中文\n",
+ "story = \"\"\"\n",
+ "在政府最近进行的一项调查中,要求公共部门的员工对他们所在部门的满意度进行评分。\n",
+ "调查结果显示,NASA 是最受欢迎的部门,满意度为 95%。\n",
+ "\n",
+ "一位 NASA 员工 John Smith 对这一发现发表了评论,他表示:\n",
+ "“我对 NASA 排名第一并不感到惊讶。这是一个与了不起的人们和令人难以置信的机会共事的好地方。我为成为这样一个创新组织的一员感到自豪。”\n",
+ "\n",
+ "NASA 的管理团队也对这一结果表示欢迎,主管 Tom Johnson 表示:\n",
+ "“我们很高兴听到我们的员工对 NASA 的工作感到满意。\n",
+ "我们拥有一支才华横溢、忠诚敬业的团队,他们为实现我们的目标不懈努力,看到他们的辛勤工作得到回报是太棒了。”\n",
+ "\n",
+ "调查还显示,社会保障管理局的满意度最低,只有 45%的员工表示他们对工作满意。\n",
+ "政府承诺解决调查中员工提出的问题,并努力提高所有部门的工作满意度。\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a8ea91d6-e841-4ee2-bed9-ca4a36df177f",
+ "metadata": {},
+ "source": [
+ "### 3.1 推断讨论主题\n",
+ "\n",
+ "上面是一篇虚构的关于政府工作人员对他们工作机构感受的报纸文章。我们可以让它确定五个正在讨论的主题,用一两个字描述每个主题,并将输出格式化为逗号分隔的列表。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "cab27b65",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['NASA', '满意度', '评论', '管理团队', '社会保障管理局']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 中文\n",
+ "prompt = f\"\"\"\n",
+ "确定以下给定文本中讨论的五个主题。\n",
+ "\n",
+ "每个主题用1-2个词概括。\n",
+ "\n",
+ "请输出一个可解析的Python列表,每个元素是一个字符串,展示了一个主题。\n",
+ "\n",
+ "给定文本: ```{story}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "790d1435",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "34be1d2a-1309-4512-841a-b6f67338938b",
+ "metadata": {},
+ "source": [
+ "### 3.2 为特定主题制作新闻提醒\n",
+ "\n",
+ "假设我们有一个新闻网站或类似的东西,这是我们感兴趣的主题:NASA、地方政府、工程、员工满意度、联邦政府等。假设我们想弄清楚,针对一篇新闻文章,其中涵盖了哪些主题。可以使用这样的prompt:确定以下主题列表中的每个项目是否是以下文本中的主题。以 0 或 1 的形式给出答案列表。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "9f53d337",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[\n",
+ " {\"美国航空航天局\": 1},\n",
+ " {\"当地政府\": 1},\n",
+ " {\"工程\": 0},\n",
+ " {\"员工满意度\": 1},\n",
+ " {\"联邦政府\": 1}\n",
+ "]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 中文\n",
+ "prompt = f\"\"\"\n",
+ "判断主题列表中的每一项是否是给定文本中的一个话题,\n",
+ "\n",
+ "以列表的形式给出答案,每个元素是一个Json对象,键为对应主题,值为对应的 0 或 1。\n",
+ "\n",
+ "主题列表:美国航空航天局、当地政府、工程、员工满意度、联邦政府\n",
+ "\n",
+ "给定文本: ```{story}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8f39f24a",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "08247dbf",
+ "metadata": {},
+ "source": [
+ "有结果可见,这个故事是与关于 NASA 、员工满意度、联邦政府有关,而与当地政府的、工程学无关。这在机器学习中有时被称为 Zero-Shot (零样本)学习算法,因为我们没有给它任何标记的训练数据。仅凭 Prompt ,它就能确定哪些主题在新闻文章中有所涵盖。\n",
+ "\n",
+ "如果我们想生成一个新闻提醒,也可以使用这个处理新闻的过程。假设我非常喜欢 NASA 所做的工作,就可以构建一个这样的系统,每当 NASA 新闻出现时,输出提醒。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "53bf1abd",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'美国航空航天局': 1, '当地政府': 1, '工程': 0, '员工满意度': 1, '联邦政府': 1}\n",
+ "提醒: 关于美国航空航天局的新消息\n"
+ ]
+ }
+ ],
+ "source": [
+ "result_lst = eval(response)\n",
+ "topic_dict = {list(i.keys())[0] : list(i.values())[0] for i in result_lst}\n",
+ "print(topic_dict)\n",
+ "if topic_dict['美国航空航天局'] == 1:\n",
+ " print(\"提醒: 关于美国航空航天局的新消息\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9fc2c643",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "76ccd189",
+ "metadata": {},
+ "source": [
+ "这就是关于推断的全部内容了,仅用几分钟时间,我们就可以构建多个用于对文本进行推理的系统,而以前则需要熟练的机器学习开发人员数天甚至数周的时间。这非常令人兴奋,无论是对于熟练的机器学习开发人员,还是对于新手来说,都可以使用 Prompt 来非常快速地构建和开始相当复杂的自然语言处理任务。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9ace190d",
+ "metadata": {},
+ "source": [
+ "## 英文版"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a3b34fec",
+ "metadata": {},
+ "source": [
+ "**1.1 情感倾向分析**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "57b08c8d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lamp_review = \"\"\"\n",
+ "Needed a nice lamp for my bedroom, and this one had \\\n",
+ "additional storage and not too high of a price point. \\\n",
+ "Got it fast. The string to our lamp broke during the \\\n",
+ "transit and the company happily sent over a new one. \\\n",
+ "Came within a few days as well. It was easy to put \\\n",
+ "together. I had a missing part, so I contacted their \\\n",
+ "support and they very quickly got me the missing piece! \\\n",
+ "Lumina seems to me to be a great company that cares \\\n",
+ "about their customers and products!!\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "5456540c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The sentiment of the product review is positive.\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "What is the sentiment of the following product review, \n",
+ "which is delimited with triple backticks?\n",
+ "\n",
+ "Review text: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "cc0fe287",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "positive\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "What is the sentiment of the following product review, \n",
+ "which is delimited with triple backticks?\n",
+ "\n",
+ "Give your answer as a single word, either \"positive\" \\\n",
+ "or \"negative\".\n",
+ "\n",
+ "Review text: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7dc01fe8",
+ "metadata": {},
+ "source": [
+ "**1.2识别情感类型**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "07708a7d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "satisfied, pleased, grateful, impressed, happy\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Identify a list of emotions that the writer of the \\\n",
+ "following review is expressing. Include no more than \\\n",
+ "five items in the list. Format your answer as a list of \\\n",
+ "lower-case words separated by commas.\n",
+ "\n",
+ "Review text: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5ebd8903",
+ "metadata": {},
+ "source": [
+ "**1.3 识别愤怒**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "0fb1fa65",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "No\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Is the writer of the following review expressing anger?\\\n",
+ "The review is delimited with triple backticks. \\\n",
+ "Give your answer as either yes or no.\n",
+ "\n",
+ "Review text: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "60186c02",
+ "metadata": {},
+ "source": [
+ "**2.1 商品信息提取**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "58ec19cd",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{\n",
+ " \"Item\": \"lamp\",\n",
+ " \"Brand\": \"Lumina\"\n",
+ "}\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Identify the following items from the review text: \n",
+ "- Item purchased by reviewer\n",
+ "- Company that made the item\n",
+ "\n",
+ "The review is delimited with triple backticks. \\\n",
+ "Format your response as a JSON object with \\\n",
+ "\"Item\" and \"Brand\" as the keys. \n",
+ "If the information isn't present, use \"unknown\" \\\n",
+ "as the value.\n",
+ "Make your response as short as possible.\n",
+ " \n",
+ "Review text: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0a290d15",
+ "metadata": {},
+ "source": [
+ "**2.2 综合情感推断和信息提取**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "785ccfe2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{\n",
+ " \"Sentiment\": \"positive\",\n",
+ " \"Anger\": false,\n",
+ " \"Item\": \"lamp\",\n",
+ " \"Brand\": \"Lumina\"\n",
+ "}\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Identify the following items from the review text: \n",
+ "- Sentiment (positive or negative)\n",
+ "- Is the reviewer expressing anger? (true or false)\n",
+ "- Item purchased by reviewer\n",
+ "- Company that made the item\n",
+ "\n",
+ "The review is delimited with triple backticks. \\\n",
+ "Format your response as a JSON object with \\\n",
+ "\"Sentiment\", \"Anger\", \"Item\" and \"Brand\" as the keys.\n",
+ "If the information isn't present, use \"unknown\" \\\n",
+ "as the value.\n",
+ "Make your response as short as possible.\n",
+ "Format the Anger value as a boolean.\n",
+ "\n",
+ "Review text: ```{lamp_review}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "28f57f53",
+ "metadata": {},
+ "source": [
+ "**3.1 推断讨论主题**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "8d2859c4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "story = \"\"\"\n",
+ "In a recent survey conducted by the government, \n",
+ "public sector employees were asked to rate their level \n",
+ "of satisfaction with the department they work at. \n",
+ "The results revealed that NASA was the most popular \n",
+ "department with a satisfaction rating of 95%.\n",
+ "\n",
+ "One NASA employee, John Smith, commented on the findings, \n",
+ "stating, \"I'm not surprised that NASA came out on top. \n",
+ "It's a great place to work with amazing people and \n",
+ "incredible opportunities. I'm proud to be a part of \n",
+ "such an innovative organization.\"\n",
+ "\n",
+ "The results were also welcomed by NASA's management team, \n",
+ "with Director Tom Johnson stating, \"We are thrilled to \n",
+ "hear that our employees are satisfied with their work at NASA. \n",
+ "We have a talented and dedicated team who work tirelessly \n",
+ "to achieve our goals, and it's fantastic to see that their \n",
+ "hard work is paying off.\"\n",
+ "\n",
+ "The survey also revealed that the \n",
+ "Social Security Administration had the lowest satisfaction \n",
+ "rating, with only 45% of employees indicating they were \n",
+ "satisfied with their job. The government has pledged to \n",
+ "address the concerns raised by employees in the survey and \n",
+ "work towards improving job satisfaction across all departments.\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "48774999",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "survey, satisfaction rating, NASA, Social Security Administration, job satisfaction\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Determine five topics that are being discussed in the \\\n",
+ "following text, which is delimited by triple backticks.\n",
+ "\n",
+ "Make each item one or two words long. \n",
+ "\n",
+ "Format your response as a list of items separated by commas.\n",
+ "Give me a list which can be read in Python.\n",
+ "\n",
+ "Text sample: ```{story}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "35afde60",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['survey',\n",
+ " ' satisfaction rating',\n",
+ " ' NASA',\n",
+ " ' Social Security Administration',\n",
+ " ' job satisfaction']"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "response.split(sep=',')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4874c5bb",
+ "metadata": {},
+ "source": [
+ "**3.2 为特定主题制作新闻提醒**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "a4d3d64f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "topic_list = [\n",
+ " \"nasa\", \"local government\", \"engineering\", \n",
+ " \"employee satisfaction\", \"federal government\"\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "a0ceea1a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1, 0, 0, 1, 1]\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Determine whether each item in the following list of \\\n",
+ "topics is a topic in the text below, which\n",
+ "is delimited with triple backticks.\n",
+ "\n",
+ "Give your answer as list with 0 or 1 for each topic.\\\n",
+ "\n",
+ "List of topics: {\", \".join(topic_list)}\n",
+ "\n",
+ "Text sample: ```{story}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "82489580",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'nasa': 1, 'local government': 0, 'engineering': 0, 'employee satisfaction': 1, 'federal government': 1}\n",
+ "ALERT: New NASA story!\n"
+ ]
+ }
+ ],
+ "source": [
+ "topic_dict = {topic_list[i] : eval(response)[i] for i in range(len(eval(response)))}\n",
+ "print(topic_dict)\n",
+ "if topic_dict['nasa'] == 1:\n",
+ " print(\"ALERT: New NASA story!\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.11"
+ },
+ "latex_envs": {
+ "LaTeX_envs_menu_present": true,
+ "autoclose": false,
+ "autocomplete": true,
+ "bibliofile": "biblio.bib",
+ "cite_by": "apalike",
+ "current_citInitial": 1,
+ "eqLabelWithNumbers": true,
+ "eqNumInitial": 1,
+ "hotkeys": {
+ "equation": "Ctrl-E",
+ "itemize": "Ctrl-I"
+ },
+ "labels_anchors": false,
+ "latex_user_defs": false,
+ "report_style_numbering": false,
+ "user_envs_cfg": false
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {
+ "height": "calc(100% - 180px)",
+ "left": "10px",
+ "top": "150px",
+ "width": "256px"
+ },
+ "toc_section_display": true,
+ "toc_window_display": true
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/content/C1 Prompt Engineering for Developer/6. 文本转换 Transforming.ipynb b/docs/content/C1 Prompt Engineering for Developer/6. 文本转换 Transforming.ipynb
new file mode 100644
index 0000000..dd3e543
--- /dev/null
+++ b/docs/content/C1 Prompt Engineering for Developer/6. 文本转换 Transforming.ipynb
@@ -0,0 +1,1348 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "08879154",
+ "metadata": {},
+ "source": [
+ "# 第六章 文本转换"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "2fac57c2",
+ "metadata": {},
+ "source": [
+ "LLM非常擅长将输入转换成不同的格式,典型应用包括多语种文本翻译、拼写及语法纠正、语气调整、格式转换等。\n",
+ "\n",
+ "本章节将介绍如何使用编程的方式,调用API接口来实现“文本转换”功能。"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "bf3733d4",
+ "metadata": {},
+ "source": [
+ "## 一、文本翻译"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "1b418e32",
+ "metadata": {},
+ "source": [
+ "### 1.1 翻译为西班牙语"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "8a5bee0c",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Hola, me gustaría ordenar una batidora.\n"
+ ]
+ }
+ ],
+ "source": [
+ "from tool import get_completion\n",
+ "\n",
+ "prompt = f\"\"\"\n",
+ "将以下中文翻译成西班牙语: \\ \n",
+ "```您好,我想订购一个搅拌机。```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "e3e922b4",
+ "metadata": {},
+ "source": [
+ "### 1.2 识别语种"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "c2c66002",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "这段文本是法语。\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "请告诉我以下文本是什么语种: \n",
+ "```Combien coûte le lampadaire?```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "c1841354",
+ "metadata": {},
+ "source": [
+ "### 1.3 多语种翻译"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "b0c4fa41",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "中文:我想订购一个篮球。\n",
+ "英文:I want to order a basketball.\n",
+ "法语:Je veux commander un ballon de basket.\n",
+ "西班牙语:Quiero pedir una pelota de baloncesto.\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "请将以下文本分别翻译成中文、英文、法语和西班牙语: \n",
+ "```I want to order a basketball.```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "68723ba5",
+ "metadata": {},
+ "source": [
+ "### 1.4 同时进行语气转换"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "a4770dcc",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Formal: ¿Le gustaría ordenar una almohada?\n",
+ "Informal: ¿Te gustaría ordenar una almohada?\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Translate the following text to Spanish in both the \\\n",
+ "formal and informal forms: \n",
+ "'Would you like to order a pillow?'\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "2c52ca54",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "正式语气:您是否需要订购一个枕头?\n",
+ "非正式语气:你想要订购一个枕头吗?\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "请将以下文本翻译成中文,分别展示成正式与非正式两种语气: \n",
+ "```Would you like to order a pillow?```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "b2dc4c56",
+ "metadata": {},
+ "source": [
+ "### 1.5 通用翻译器"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "54b00aa4",
+ "metadata": {},
+ "source": [
+ "随着全球化与跨境商务的发展,交流的用户可能来自各个不同的国家,使用不同的语言,因此我们需要一个通用翻译器,识别各个消息的语种,并翻译成目标用户的母语,从而实现更方便的跨国交流。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f87a34f0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "user_messages = [\n",
+ " \"La performance du système est plus lente que d'habitude.\", # System performance is slower than normal \n",
+ " \"Mi monitor tiene píxeles que no se iluminan.\", # My monitor has pixels that are not lighting\n",
+ " \"Il mio mouse non funziona\", # My mouse is not working\n",
+ " \"Mój klawisz Ctrl jest zepsuty\", # My keyboard has a broken control key\n",
+ " \"我的屏幕在闪烁\" # My screen is flashing\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "6a884190",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "原始消息 (法语): La performance du système est plus lente que d'habitude.\n",
+ "\n",
+ "中文翻译:系统性能比平时慢。\n",
+ "英文翻译:The system performance is slower than usual. \n",
+ "=========================================\n",
+ "原始消息 (西班牙语): Mi monitor tiene píxeles que no se iluminan.\n",
+ "\n",
+ "中文翻译:我的显示器有一些像素点不亮。\n",
+ "英文翻译:My monitor has pixels that do not light up. \n",
+ "=========================================\n",
+ "原始消息 (意大利语): Il mio mouse non funziona\n",
+ "\n",
+ "中文翻译:我的鼠标不工作\n",
+ "英文翻译:My mouse is not working \n",
+ "=========================================\n",
+ "原始消息 (这段文本是波兰语。): Mój klawisz Ctrl jest zepsuty\n",
+ "\n",
+ "中文翻译:我的Ctrl键坏了\n",
+ "英文翻译:My Ctrl key is broken \n",
+ "=========================================\n",
+ "原始消息 (中文): 我的屏幕在闪烁\n",
+ "\n",
+ "中文翻译:我的屏幕在闪烁\n",
+ "英文翻译:My screen is flickering. \n",
+ "=========================================\n"
+ ]
+ }
+ ],
+ "source": [
+ "import time\n",
+ "for issue in user_messages:\n",
+ " time.sleep(20)\n",
+ " prompt = f\"告诉我以下文本是什么语种,直接输出语种,如法语,无需输出标点符号: ```{issue}```\"\n",
+ " lang = get_completion(prompt)\n",
+ " print(f\"原始消息 ({lang}): {issue}\\n\")\n",
+ "\n",
+ " prompt = f\"\"\"\n",
+ " 将以下消息分别翻译成英文和中文,并写成\n",
+ " 中文翻译:xxx\n",
+ " 英文翻译:yyy\n",
+ " 的格式:\n",
+ " ```{issue}```\n",
+ " \"\"\"\n",
+ " response = get_completion(prompt)\n",
+ " print(response, \"\\n=========================================\")"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "6ab558a2",
+ "metadata": {},
+ "source": [
+ "## 二、语气与写作风格调整"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "b85ae847",
+ "metadata": {},
+ "source": [
+ "写作的语气往往会根据受众对象而有所调整。例如,对于工作邮件,我们常常需要使用正式语气与书面用词,而对同龄朋友的微信聊天,可能更多地会使用轻松、口语化的语气。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "84ce3099",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "尊敬的先生/女士,\n",
+ "\n",
+ "我是小羊,我希望能够向您确认一下我们部门需要采购的显示器尺寸是多少寸。上次我们交谈时,您提到了这个问题。\n",
+ "\n",
+ "期待您的回复。\n",
+ "\n",
+ "谢谢!\n",
+ "\n",
+ "此致,\n",
+ "\n",
+ "小羊\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "将以下文本翻译成商务信函的格式: \n",
+ "```小老弟,我小羊,上回你说咱部门要采购的显示器是多少寸来着?```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "98df9009",
+ "metadata": {},
+ "source": [
+ "## 三、文件格式转换"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "0bf9c074",
+ "metadata": {},
+ "source": [
+ "ChatGPT非常擅长不同格式之间的转换,例如JSON到HTML、XML、Markdown等。在下述例子中,我们有一个包含餐厅员工姓名和电子邮件的列表的JSON,我们希望将其从JSON转换为HTML。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "fad3f358",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_json = { \"resturant employees\" :[ \n",
+ " {\"name\":\"Shyam\", \"email\":\"shyamjaiswal@gmail.com\"},\n",
+ " {\"name\":\"Bob\", \"email\":\"bob32@gmail.com\"},\n",
+ " {\"name\":\"Jai\", \"email\":\"jai87@gmail.com\"}\n",
+ "]}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "f54e7398",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "
\n",
+ " resturant employees\n",
+ " \n",
+ " \n",
+ " | name | \n",
+ " email | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Shyam | \n",
+ " shyamjaiswal@gmail.com | \n",
+ "
\n",
+ " \n",
+ " | Bob | \n",
+ " bob32@gmail.com | \n",
+ "
\n",
+ " \n",
+ " | Jai | \n",
+ " jai87@gmail.com | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "将以下Python字典从JSON转换为HTML表格,保留表格标题和列名:{data_json}\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "a0026f3c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ " resturant employees\n",
+ " \n",
+ " \n",
+ " | name | \n",
+ " email | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Shyam | \n",
+ " shyamjaiswal@gmail.com | \n",
+ "
\n",
+ " \n",
+ " | Bob | \n",
+ " bob32@gmail.com | \n",
+ "
\n",
+ " \n",
+ " | Jai | \n",
+ " jai87@gmail.com | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from IPython.display import display, Markdown, Latex, HTML, JSON\n",
+ "display(HTML(response))"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "29b7167b",
+ "metadata": {},
+ "source": [
+ "## 四、拼写及语法纠正"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "22776140",
+ "metadata": {},
+ "source": [
+ "拼写及语法的检查与纠正是一个十分常见的需求,特别是使用非母语语言,例如,在论坛发帖时,或发表英文论文时,校对是一件十分重要的事情。\n",
+ "\n",
+ "下述例子给定了一个句子列表,其中有些句子存在拼写或语法问题,有些则没有,我们循环遍历每个句子,要求模型校对文本,如果正确则输出“未发现错误”,如果错误则输出纠正后的文本。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "b7d04bc0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text = [ \n",
+ " \"The girl with the black and white puppies have a ball.\", # The girl has a ball.\n",
+ " \"Yolanda has her notebook.\", # ok\n",
+ " \"Its going to be a long day. Does the car need it’s oil changed?\", # Homonyms\n",
+ " \"Their goes my freedom. There going to bring they’re suitcases.\", # Homonyms\n",
+ " \"Your going to need you’re notebook.\", # Homonyms\n",
+ " \"That medicine effects my ability to sleep. Have you heard of the butterfly affect?\", # Homonyms\n",
+ " \"This phrase is to cherck chatGPT for spelling abilitty\" # spelling\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "1ef55b7b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 The girl with the black and white puppies has a ball.\n",
+ "1 Yolanda has her notebook.\n",
+ "2 It's going to be a long day. Does the car need its oil changed?\n",
+ "3 Their goes my freedom. There going to bring their suitcases.\n",
+ "4 You're going to need your notebook.\n",
+ "5 That medicine affects my ability to sleep. Have you heard of the butterfly effect?\n",
+ "6 This phrase is to check chatGPT for spelling ability.\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i in range(len(text)):\n",
+ " time.sleep(20)\n",
+ " prompt = f\"\"\"请校对并更正以下文本,注意纠正文本保持原始语种,无需输出原始文本。\n",
+ " 如果您没有发现任何错误,请说“未发现错误”。\n",
+ " \n",
+ " 例如:\n",
+ " 输入:I are happy.\n",
+ " 输出:I am happy.\n",
+ " ```{text[i]}```\"\"\"\n",
+ " response = get_completion(prompt)\n",
+ " print(i, response)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "538181e0",
+ "metadata": {},
+ "source": [
+ "以下是一个简单的语法纠错示例(译注:与 Grammarly 功能类似),输入文本为一段关于熊猫玩偶的评价,输出为纠正后的文本。本例使用的 Prompt 较为简单,你也可以进一步要求进行语调的更改。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "6696b06a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text = f\"\"\"\n",
+ "Got this for my daughter for her birthday cuz she keeps taking \\\n",
+ "mine from my room. Yes, adults also like pandas too. She takes \\\n",
+ "it everywhere with her, and it's super soft and cute. One of the \\\n",
+ "ears is a bit lower than the other, and I don't think that was \\\n",
+ "designed to be asymmetrical. It's a bit small for what I paid for it \\\n",
+ "though. I think there might be other options that are bigger for \\\n",
+ "the same price. It arrived a day earlier than expected, so I got \\\n",
+ "to play with it myself before I gave it to my daughter.\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "50cca36e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I got this for my daughter's birthday because she keeps taking mine from my room. Yes, adults also like pandas too. She takes it everywhere with her, and it's super soft and cute. However, one of the ears is a bit lower than the other, and I don't think that was designed to be asymmetrical. It's also a bit smaller than I expected for the price. I think there might be other options that are bigger for the same price. On the bright side, it arrived a day earlier than expected, so I got to play with it myself before giving it to my daughter.\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"校对并更正以下商品评论:```{text}```\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "2e2d1f6a",
+ "metadata": {},
+ "source": [
+ "引入 ```Redlines``` 包,详细显示并对比纠错过程:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "07f32f1f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 如未安装redlines,需先安装\n",
+ "!pip3.8 install redlines"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "e8604dfb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "Got I got this for my daughter for her daughter's birthday cuz because she keeps taking mine from my room. room. Yes, adults also like pandas too. too. She takes it everywhere with her, and it's super soft and cute. One cute. However, one of the ears is a bit lower than the other, and I don't think that was designed to be asymmetrical. It's also a bit small smaller than I expected for what I paid for it though. the price. I think there might be other options that are bigger for the same price. It price. On the bright side, it arrived a day earlier than expected, so I got to play with it myself before I gave giving it to my daughter.\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from redlines import Redlines\n",
+ "from IPython.display import display, Markdown\n",
+ "\n",
+ "diff = Redlines(text,response)\n",
+ "display(Markdown(diff.output_markdown))"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "3ee5d487",
+ "metadata": {},
+ "source": [
+ "## 五、综合样例\n",
+ "下述例子展示了同一段评论,用一段prompt同时进行文本翻译+拼写纠正+风格调整+格式转换。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "5061d6a3",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "【优点】\n",
+ "- 超级柔软可爱,女儿生日礼物非常受欢迎。\n",
+ "- 成人也喜欢熊猫,我也很喜欢它。\n",
+ "- 提前一天到货,让我有时间玩一下。\n",
+ "\n",
+ "【缺点】\n",
+ "- 一只耳朵比另一只低,不对称。\n",
+ "- 价格有点贵,但尺寸有点小,可能有更大的同价位选择。\n",
+ "\n",
+ "【总结】\n",
+ "这只熊猫玩具非常适合作为生日礼物,柔软可爱,深受孩子喜欢。虽然价格有点贵,但尺寸有点小,不对称的设计也有点让人失望。如果你想要更大的同价位选择,可能需要考虑其他选项。总的来说,这是一款不错的熊猫玩具,值得购买。"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "针对以下三个反引号之间的英文评论文本,\n",
+ "首先进行拼写及语法纠错,\n",
+ "然后将其转化成中文,\n",
+ "再将其转化成优质淘宝评论的风格,从各种角度出发,分别说明产品的优点与缺点,并进行总结。\n",
+ "润色一下描述,使评论更具有吸引力。\n",
+ "输出结果格式为:\n",
+ "【优点】xxx\n",
+ "【缺点】xxx\n",
+ "【总结】xxx\n",
+ "注意,只需填写xxx部分,并分段输出。\n",
+ "将结果输出成Markdown格式。\n",
+ "```{text}```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "display(Markdown(response))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c2511118",
+ "metadata": {},
+ "source": [
+ "## 六、英文版"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e0555151",
+ "metadata": {},
+ "source": [
+ "**1.1 翻译为西班牙语**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "4ddc32be",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Hola, me gustaría ordenar una licuadora.\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Translate the following English text to Spanish: \\ \n",
+ "```Hi, I would like to order a blender```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cd74b887",
+ "metadata": {},
+ "source": [
+ "**1.2 识别语种**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "c02bcfca",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "This language is French.\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Tell me which language this is: \n",
+ "```Combien coûte le lampadaire?```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "39b08e97",
+ "metadata": {},
+ "source": [
+ "**1.3 多语种翻译**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "51a00c44",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "French: ```Je veux commander un ballon de basket```\n",
+ "Spanish: ```Quiero ordenar una pelota de baloncesto```\n",
+ "English: ```I want to order a basketball```\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Translate the following text to French and Spanish\n",
+ "and English pirate: \\\n",
+ "```I want to order a basketball```\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "34e7ad95",
+ "metadata": {},
+ "source": [
+ "**1.4 同时进行语气转换**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "3d117e73",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Formal: ¿Le gustaría ordenar una almohada?\n",
+ "Informal: ¿Te gustaría ordenar una almohada?\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Translate the following text to Spanish in both the \\\n",
+ "formal and informal forms: \n",
+ "'Would you like to order a pillow?'\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "396b4f42",
+ "metadata": {},
+ "source": [
+ "**1.5 通用翻译器**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "2e36f3d0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "user_messages = [\n",
+ " \"La performance du système est plus lente que d'habitude.\", # System performance is slower than normal \n",
+ " \"Mi monitor tiene píxeles que no se iluminan.\", # My monitor has pixels that are not lighting\n",
+ " \"Il mio mouse non funziona\", # My mouse is not working\n",
+ " \"Mój klawisz Ctrl jest zepsuty\", # My keyboard has a broken control key\n",
+ " \"我的屏幕在闪烁\" # My screen is flashing\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "eb5d58cc",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Original message (The language is French.): La performance du système est plus lente que d'habitude.\n",
+ "The performance of the system is slower than usual.\n",
+ "\n",
+ "시스템의 성능이 평소보다 느립니다. \n",
+ "\n",
+ "Original message (The language is Spanish.): Mi monitor tiene píxeles que no se iluminan.\n",
+ "English: \"My monitor has pixels that do not light up.\"\n",
+ "\n",
+ "Korean: \"내 모니터에는 밝아지지 않는 픽셀이 있습니다.\" \n",
+ "\n",
+ "Original message (The language is Italian.): Il mio mouse non funziona\n",
+ "English: \"My mouse is not working.\"\n",
+ "Korean: \"내 마우스가 작동하지 않습니다.\" \n",
+ "\n",
+ "Original message (The language is Polish.): Mój klawisz Ctrl jest zepsuty\n",
+ "English: \"My Ctrl key is broken\"\n",
+ "Korean: \"내 Ctrl 키가 고장 났어요\" \n",
+ "\n",
+ "Original message (The language is Chinese.): 我的屏幕在闪烁\n",
+ "English: My screen is flickering.\n",
+ "Korean: 내 화면이 깜박거립니다. \n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for issue in user_messages:\n",
+ " prompt = f\"Tell me what language this is: ```{issue}```\"\n",
+ " lang = get_completion(prompt)\n",
+ " print(f\"Original message ({lang}): {issue}\")\n",
+ "\n",
+ " prompt = f\"\"\"\n",
+ " Translate the following text to English \\\n",
+ " and Korean: ```{issue}```\n",
+ " \"\"\"\n",
+ " response = get_completion(prompt)\n",
+ " print(response, \"\\n\")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "307039f9",
+ "metadata": {},
+ "source": [
+ "**2.1 语气风格调整**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "dea0b951",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dear Sir/Madam,\n",
+ "\n",
+ "I hope this letter finds you well. My name is Joe, and I am writing to bring your attention to a specification document regarding a standing lamp. \n",
+ "\n",
+ "I kindly request that you take a moment to review the attached document, as it provides detailed information about the features and specifications of the aforementioned standing lamp. \n",
+ "\n",
+ "Thank you for your time and consideration. I look forward to discussing this further with you.\n",
+ "\n",
+ "Yours sincerely,\n",
+ "Joe\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Translate the following from slang to a business letter: \n",
+ "'Dude, This is Joe, check out this spec on this standing lamp.'\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6be8cdb8",
+ "metadata": {},
+ "source": [
+ "**3.1 文件格式转换**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "1c7007ad",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_json = { \"resturant employees\" :[ \n",
+ " {\"name\":\"Shyam\", \"email\":\"shyamjaiswal@gmail.com\"},\n",
+ " {\"name\":\"Bob\", \"email\":\"bob32@gmail.com\"},\n",
+ " {\"name\":\"Jai\", \"email\":\"jai87@gmail.com\"}\n",
+ "]}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "165cb7f1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "Restaurant Employees
\n",
+ "\n",
+ "\n",
+ " \n",
+ " | Name | \n",
+ " Email | \n",
+ "
\n",
+ " \n",
+ " | Shyam | \n",
+ " shyamjaiswal@gmail.com | \n",
+ "
\n",
+ " \n",
+ " | Bob | \n",
+ " bob32@gmail.com | \n",
+ "
\n",
+ " \n",
+ " | Jai | \n",
+ " jai87@gmail.com | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "Translate the following python dictionary from JSON to an HTML \\\n",
+ "table with column headers and title: {data_json}\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "8fe42b12",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "Restaurant Employees
\n",
+ "\n",
+ "\n",
+ " \n",
+ " | Name | \n",
+ " Email | \n",
+ "
\n",
+ " \n",
+ " | Shyam | \n",
+ " shyamjaiswal@gmail.com | \n",
+ "
\n",
+ " \n",
+ " | Bob | \n",
+ " bob32@gmail.com | \n",
+ "
\n",
+ " \n",
+ " | Jai | \n",
+ " jai87@gmail.com | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from IPython.display import display, Markdown, Latex, HTML, JSON\n",
+ "display(HTML(response))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "147d40ce",
+ "metadata": {},
+ "source": [
+ "**4.1 拼写及语法纠错**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "5f346743",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text = [ \n",
+ " \"The girl with the black and white puppies have a ball.\", # The girl has a ball.\n",
+ " \"Yolanda has her notebook.\", # ok\n",
+ " \"Its going to be a long day. Does the car need it’s oil changed?\", # Homonyms\n",
+ " \"Their goes my freedom. There going to bring they’re suitcases.\", # Homonyms\n",
+ " \"Your going to need you’re notebook.\", # Homonyms\n",
+ " \"That medicine effects my ability to sleep. Have you heard of the butterfly affect?\", # Homonyms\n",
+ " \"This phrase is to cherck chatGPT for spelling abilitty\" # spelling\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "98f72c5b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The girl with the black and white puppies has a ball.\n",
+ "No errors found.\n",
+ "It's going to be a long day. Does the car need its oil changed?\n",
+ "There goes my freedom. They're going to bring their suitcases.\n",
+ "You're going to need your notebook.\n",
+ "That medicine affects my ability to sleep. Have you heard of the butterfly effect?\n",
+ "This phrase is to check chatGPT for spelling ability.\n"
+ ]
+ }
+ ],
+ "source": [
+ "for t in text:\n",
+ " prompt = f\"\"\"Proofread and correct the following text\n",
+ " and rewrite the corrected version. If you don't find\n",
+ " and errors, just say \"No errors found\". Don't use \n",
+ " any punctuation around the text:\n",
+ " ```{t}```\"\"\"\n",
+ " response = get_completion(prompt)\n",
+ " print(response)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "1ae98ab3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text = f\"\"\"\n",
+ "Got this for my daughter for her birthday cuz she keeps taking \\\n",
+ "mine from my room. Yes, adults also like pandas too. She takes \\\n",
+ "it everywhere with her, and it's super soft and cute. One of the \\\n",
+ "ears is a bit lower than the other, and I don't think that was \\\n",
+ "designed to be asymmetrical. It's a bit small for what I paid for it \\\n",
+ "though. I think there might be other options that are bigger for \\\n",
+ "the same price. It arrived a day earlier than expected, so I got \\\n",
+ "to play with it myself before I gave it to my daughter.\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "9013b4fc",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Got this for my daughter for her birthday because she keeps taking mine from my room. Yes, adults also like pandas too. She takes it everywhere with her, and it's super soft and cute. However, one of the ears is a bit lower than the other, and I don't think that was designed to be asymmetrical. Additionally, it's a bit small for what I paid for it. I believe there might be other options that are bigger for the same price. On the positive side, it arrived a day earlier than expected, so I got to play with it myself before I gave it to my daughter.\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"proofread and correct this review: ```{text}```\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "e73cfab0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "Got this for my daughter for her birthday cuz because she keeps taking mine from my room. room. Yes, adults also like pandas too. too. She takes it everywhere with her, and it's super soft and cute. One cute. However, one of the ears is a bit lower than the other, and I don't think that was designed to be asymmetrical. It's Additionally, it's a bit small for what I paid for it though. it. I think believe there might be other options that are bigger for the same price. It price. On the positive side, it arrived a day earlier than expected, so I got to play with it myself before I gave it to my daughter.\n",
+ "daughter."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from redlines import Redlines\n",
+ "from IPython.display import display, Markdown\n",
+ "\n",
+ "diff = Redlines(text,response)\n",
+ "display(Markdown(diff.output_markdown))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ced0b2ba",
+ "metadata": {},
+ "source": [
+ "**5.1 综合样例**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "bdc1c630",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text = f\"\"\"\n",
+ "Got this for my daughter for her birthday cuz she keeps taking \\\n",
+ "mine from my room. Yes, adults also like pandas too. She takes \\\n",
+ "it everywhere with her, and it's super soft and cute. One of the \\\n",
+ "ears is a bit lower than the other, and I don't think that was \\\n",
+ "designed to be asymmetrical. It's a bit small for what I paid for it \\\n",
+ "though. I think there might be other options that are bigger for \\\n",
+ "the same price. It arrived a day earlier than expected, so I got \\\n",
+ "to play with it myself before I gave it to my daughter.\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "9dd1c052",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "**Title: A Delightful Gift for Panda Enthusiasts: A Review of the Soft and Adorable Panda Plush Toy**\n",
+ "\n",
+ "*Reviewer: [Your Name]*\n",
+ "\n",
+ "---\n",
+ "\n",
+ "I recently purchased this charming panda plush toy as a birthday gift for my daughter, who has a penchant for \"borrowing\" my belongings from time to time. As an adult, I must admit that I too have fallen under the spell of these lovable creatures. This review aims to provide an in-depth analysis of the product, catering to advanced readers who appreciate a comprehensive evaluation.\n",
+ "\n",
+ "First and foremost, the softness and cuteness of this panda plush toy are simply unparalleled. Its irresistibly plush exterior makes it a joy to touch and hold, ensuring a delightful sensory experience for both children and adults alike. The attention to detail is evident, with its endearing features capturing the essence of a real panda. However, it is worth noting that one of the ears appears to be slightly asymmetrical, which may not have been an intentional design choice.\n",
+ "\n",
+ "While the overall quality of the product is commendable, I must express my slight disappointment regarding its size in relation to its price. Considering the investment made, I expected a larger plush toy. It is worth exploring alternative options that offer a more substantial size for the same price point. Nevertheless, this minor setback does not overshadow the toy's undeniable appeal and charm.\n",
+ "\n",
+ "In terms of delivery, I was pleasantly surprised to receive the panda plush toy a day earlier than anticipated. This unexpected early arrival allowed me to indulge in some personal playtime with the toy before presenting it to my daughter. Such promptness in delivery is a testament to the seller's efficiency and commitment to customer satisfaction.\n",
+ "\n",
+ "In conclusion, this panda plush toy is a delightful gift for both children and adults who appreciate the enchanting allure of these beloved creatures. Its softness, cuteness, and attention to detail make it a truly captivating addition to any collection. While the size may not fully justify the price, the overall quality and prompt delivery make it a worthwhile purchase. I highly recommend this panda plush toy to anyone seeking a charming and endearing companion.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "**Word Count: 305 words**"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "proofread and correct this review. Make it more compelling. \n",
+ "Ensure it follows APA style guide and targets an advanced reader. \n",
+ "Output in markdown format.\n",
+ "Text: ```{text}```\n",
+ "\"\"\"\n",
+ "# 校对注:APA style guide是APA Style Guide是一套用于心理学和相关领域的研究论文写作和格式化的规则。\n",
+ "# 它包括了文本的缩略版,旨在快速阅读,包括引用、解释和参考列表,\n",
+ "# 其详细内容可参考:https://apastyle.apa.org/about-apa-style\n",
+ "# 下一单元格内的汉化prompt内容由译者进行了本地化处理,仅供参考\n",
+ "response = get_completion(prompt)\n",
+ "display(Markdown(response))\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.11"
+ },
+ "latex_envs": {
+ "LaTeX_envs_menu_present": true,
+ "autoclose": false,
+ "autocomplete": true,
+ "bibliofile": "biblio.bib",
+ "cite_by": "apalike",
+ "current_citInitial": 1,
+ "eqLabelWithNumbers": true,
+ "eqNumInitial": 1,
+ "hotkeys": {
+ "equation": "Ctrl-E",
+ "itemize": "Ctrl-I"
+ },
+ "labels_anchors": false,
+ "latex_user_defs": false,
+ "report_style_numbering": false,
+ "user_envs_cfg": false
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": true
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/content/C1 Prompt Engineering for Developer/7. 文本扩展 Expanding.ipynb b/docs/content/C1 Prompt Engineering for Developer/7. 文本扩展 Expanding.ipynb
new file mode 100644
index 0000000..8b3b58d
--- /dev/null
+++ b/docs/content/C1 Prompt Engineering for Developer/7. 文本扩展 Expanding.ipynb
@@ -0,0 +1,447 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 第七章 文本扩展"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "扩展是将短文本(例如一组说明或主题列表)输入到大型语言模型中,让模型生成更长的文本(例如基于某个主题的电子邮件或论文)。这种应用是一把双刃剑,好处例如将大型语言模型用作头脑风暴的伙伴;但也存在问题,例如某人可能会使用它来生成大量垃圾邮件。因此,当你使用大型语言模型的这些功能时,请仅以**负责任** (responsible) 和**有益于人们** (helps people) 的方式使用它们。\n",
+ "\n",
+ "在本章中,你将学会如何基于 OpenAI API 生成*针对每位客户评价优化*的客服电子邮件。我们还将利用模型的另一个输入参数称为温度,这种参数允许您在模型响应中变化探索的程度和多样性。\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 一、定制客户邮件"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "我们将根据客户评价和情感,针对性写自动回复邮件。因此,我们将给定客户评价和情感,使用 LLM 针对性生成响应,即根据客户评价和评论情感生成定制电子邮件。\n",
+ "\n",
+ "我们首先给出一个示例,包括一个评论及对应的情感。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 我们可以在推理那章学习到如何对一个评论判断其情感倾向\n",
+ "sentiment = \"消极的\"\n",
+ "\n",
+ "# 一个产品的评价\n",
+ "review = f\"\"\"\n",
+ "他们在11月份的季节性销售期间以约49美元的价格出售17件套装,折扣约为一半。\\\n",
+ "但由于某些原因(可能是价格欺诈),到了12月第二周,同样的套装价格全都涨到了70美元到89美元不等。\\\n",
+ "11件套装的价格也上涨了大约10美元左右。\\\n",
+ "虽然外观看起来还可以,但基座上锁定刀片的部分看起来不如几年前的早期版本那么好。\\\n",
+ "不过我打算非常温柔地使用它,例如,\\\n",
+ "我会先在搅拌机中将像豆子、冰、米饭等硬物研磨,然后再制成所需的份量,\\\n",
+ "切换到打蛋器制作更细的面粉,或者在制作冰沙时先使用交叉切割刀片,然后使用平面刀片制作更细/不粘的效果。\\\n",
+ "制作冰沙时,特别提示:\\\n",
+ "将水果和蔬菜切碎并冷冻(如果使用菠菜,则轻轻煮软菠菜,然后冷冻直到使用;\\\n",
+ "如果制作果酱,则使用小到中号的食品处理器),这样可以避免在制作冰沙时添加太多冰块。\\\n",
+ "大约一年后,电机发出奇怪的噪音,我打电话给客服,但保修已经过期了,所以我不得不再买一个。\\\n",
+ "总的来说,这些产品的总体质量已经下降,因此它们依靠品牌认可和消费者忠诚度来维持销售。\\\n",
+ "货物在两天内到达。\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "我们已经使用推断课程中所学方法提取了情感,这是一个关于搅拌机的客户评价,现在我们将根据情感定制回复。\n",
+ "\n",
+ "以下述 Prompt 为例:假设你是一个客户服务 AI 助手,你的任务是为客户发送电子邮件回复,根据通过三个反引号分隔的客户电子邮件,生成一封回复以感谢客户的评价。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "尊敬的客户,\n",
+ "\n",
+ "非常感谢您对我们产品的评价。我们非常抱歉您在购买过程中遇到了价格上涨的问题。我们一直致力于为客户提供最优惠的价格,但由于市场波动,价格可能会有所变化。我们深表歉意,如果您需要任何帮助,请随时联系我们的客户服务团队。\n",
+ "\n",
+ "我们非常感谢您对我们产品的详细评价和使用技巧。我们将会把您的反馈传达给我们的产品团队,以便改进我们的产品质量和性能。\n",
+ "\n",
+ "再次感谢您对我们的支持和反馈。如果您需要任何帮助或有任何疑问,请随时联系我们的客户服务团队。\n",
+ "\n",
+ "祝您一切顺利!\n",
+ "\n",
+ "AI客户代理\n"
+ ]
+ }
+ ],
+ "source": [
+ "from tool import get_completion\n",
+ "\n",
+ "prompt = f\"\"\"\n",
+ "你是一位客户服务的AI助手。\n",
+ "你的任务是给一位重要客户发送邮件回复。\n",
+ "根据客户通过“```”分隔的评价,生成回复以感谢客户的评价。提醒模型使用评价中的具体细节\n",
+ "用简明而专业的语气写信。\n",
+ "作为“AI客户代理”签署电子邮件。\n",
+ "客户评论:\n",
+ "```{review}```\n",
+ "评论情感:{sentiment}\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 二、引入温度系数\n",
+ "\n",
+ "接下来,我们将使用语言模型的一个称为“温度” (Temperature) 的参数,它将允许我们改变模型响应的多样性。您可以将温度视为模型探索或随机性的程度。\n",
+ "\n",
+ "例如,在一个特定的短语中,“我的最爱食品”最有可能的下一个词是“比萨”,其次最有可能的是“寿司”和“塔可”。因此,在温度为零时,模型将总是选择最有可能的下一个词,而在较高的温度下,它还将选择其中一个不太可能的词,在更高的温度下,它甚至可能选择塔可,而这种可能性仅为五分之一。您可以想象,随着模型继续生成更多单词的最终响应,“我的最爱食品是比萨”将会与第一个响应“我的最爱食品是塔可”产生差异。随着模型的继续,这两个响应也将变得越来越不同。\n",
+ "\n",
+ "一般来说,在构建需要可预测响应的应用程序时,我建议**设置温度为零**。在所有课程中,我们一直设置温度为零,如果您正在尝试构建一个可靠和可预测的系统,我认为您应该选择这个温度。如果您尝试以更具创意的方式使用模型,可能需要更广泛地输出不同的结果,那么您可能需要使用更高的温度。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "同一段来信,我们提醒模型使用用户来信中的详细信息,并设置温度:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "尊敬的客户,\n",
+ "\n",
+ "感谢您对我们产品的评价。我们非常重视您的意见,并对您在使用过程中遇到的问题表示诚挚的道歉。\n",
+ "\n",
+ "我们对价格的变动深感抱歉。根据您的描述,我们了解到在12月第二周,套装的价格出现了不同程度的上涨。我们会进一步调查此事,并确保我们的定价策略更加透明和一致。\n",
+ "\n",
+ "您提到了产品部分的质量下降,特别是锁定刀片的部分。我们对此感到非常遗憾,并将反馈给我们的研发团队,以便改进产品的设计和质量控制。我们始终致力于提供优质的产品,以满足客户的需求和期望。\n",
+ "\n",
+ "此外,我们将非常感谢您分享了您对产品的使用方式和相关提示。您的经验和建议对我们来说非常宝贵,我们将考虑将其纳入我们的产品改进计划中。\n",
+ "\n",
+ "如果您需要进一步帮助或有其他问题,请随时联系我们的客户服务团队。我们将竭诚为您提供支持和解决方案。\n",
+ "\n",
+ "再次感谢您的反馈和对我们的支持。我们将继续努力提供更好的产品和服务。\n",
+ "\n",
+ "祝您一切顺利!\n",
+ "\n",
+ "AI客户代理\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 第一次运行\n",
+ "prompt = f\"\"\"\n",
+ "你是一名客户服务的AI助手。\n",
+ "你的任务是给一位重要的客户发送邮件回复。\n",
+ "根据通过“```”分隔的客户电子邮件生成回复,以感谢客户的评价。\n",
+ "如果情感是积极的或中性的,感谢他们的评价。\n",
+ "如果情感是消极的,道歉并建议他们联系客户服务。\n",
+ "请确保使用评论中的具体细节。\n",
+ "以简明和专业的语气写信。\n",
+ "以“AI客户代理”的名义签署电子邮件。\n",
+ "客户评价:```{review}```\n",
+ "评论情感:{sentiment}\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt, temperature=0.7)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "亲爱的客户,\n",
+ "\n",
+ "非常感谢您对我们产品的评价和反馈。我们非常重视您的意见,并感谢您对我们产品的支持。\n",
+ "\n",
+ "首先,我们对价格的变动感到非常抱歉给您带来了困扰。我们会认真考虑您提到的情况,并采取适当的措施来改进我们的价格策略,以避免类似情况再次发生。\n",
+ "\n",
+ "关于产品质量的问题,我们深感抱歉。我们一直致力于提供高质量的产品,并且我们会将您提到的问题反馈给我们的研发团队,以便改进产品的设计和制造过程。\n",
+ "\n",
+ "如果您需要更多关于产品保修的信息,或者对我们的其他产品有任何疑问或需求,请随时联系我们的客户服务团队。我们将竭诚为您提供帮助和支持。\n",
+ "\n",
+ "再次感谢您对我们产品的评价和支持。我们将继续努力提供优质的产品和出色的客户服务,以满足您的需求。\n",
+ "\n",
+ "祝您度过愉快的一天!\n",
+ "\n",
+ "AI客户代理\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 第二次运行\n",
+ "prompt = f\"\"\"\n",
+ "你是一名客户服务的AI助手。\n",
+ "你的任务是给一位重要的客户发送邮件回复。\n",
+ "根据通过“```”分隔的客户电子邮件生成回复,以感谢客户的评价。\n",
+ "如果情感是积极的或中性的,感谢他们的评价。\n",
+ "如果情感是消极的,道歉并建议他们联系客户服务。\n",
+ "请确保使用评论中的具体细节。\n",
+ "以简明和专业的语气写信。\n",
+ "以“AI客户代理”的名义签署电子邮件。\n",
+ "客户评价:```{review}```\n",
+ "评论情感:{sentiment}\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt, temperature=0.7)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "在温度为零时,每次执行相同的 Prompt ,您获得的回复理应相同。而使用温度为 0.7 时,则每次都会获得不同的输出。\n",
+ "\n",
+ "所以,您可以看到它与我们之前收到的电子邮件不同。再次执行将再次获得不同的电子邮件。\n",
+ "\n",
+ "因此,我建议您自己尝试温度,以查看输出如何变化。总之,在更高的温度下,模型的输出更加随机。您几乎可以将其视为在更高的温度下,助手**更易分心**,但也许**更有创造力**。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 三、英文版"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**1.1 定制客户邮件**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# given the sentiment from the lesson on \"inferring\",\n",
+ "# and the original customer message, customize the email\n",
+ "sentiment = \"negative\"\n",
+ "\n",
+ "# review for a blender\n",
+ "review = f\"\"\"\n",
+ "So, they still had the 17 piece system on seasonal \\\n",
+ "sale for around $49 in the month of November, about \\\n",
+ "half off, but for some reason (call it price gouging) \\\n",
+ "around the second week of December the prices all went \\\n",
+ "up to about anywhere from between $70-$89 for the same \\\n",
+ "system. And the 11 piece system went up around $10 or \\\n",
+ "so in price also from the earlier sale price of $29. \\\n",
+ "So it looks okay, but if you look at the base, the part \\\n",
+ "where the blade locks into place doesn’t look as good \\\n",
+ "as in previous editions from a few years ago, but I \\\n",
+ "plan to be very gentle with it (example, I crush \\\n",
+ "very hard items like beans, ice, rice, etc. in the \\ \n",
+ "blender first then pulverize them in the serving size \\\n",
+ "I want in the blender then switch to the whipping \\\n",
+ "blade for a finer flour, and use the cross cutting blade \\\n",
+ "first when making smoothies, then use the flat blade \\\n",
+ "if I need them finer/less pulpy). Special tip when making \\\n",
+ "smoothies, finely cut and freeze the fruits and \\\n",
+ "vegetables (if using spinach-lightly stew soften the \\ \n",
+ "spinach then freeze until ready for use-and if making \\\n",
+ "sorbet, use a small to medium sized food processor) \\ \n",
+ "that you plan to use that way you can avoid adding so \\\n",
+ "much ice if at all-when making your smoothie. \\\n",
+ "After about a year, the motor was making a funny noise. \\\n",
+ "I called customer service but the warranty expired \\\n",
+ "already, so I had to buy another one. FYI: The overall \\\n",
+ "quality has gone done in these types of products, so \\\n",
+ "they are kind of counting on brand recognition and \\\n",
+ "consumer loyalty to maintain sales. Got it in about \\\n",
+ "two days.\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dear Valued Customer,\n",
+ "\n",
+ "Thank you for taking the time to share your review with us. We appreciate your feedback and apologize for any inconvenience you may have experienced.\n",
+ "\n",
+ "We are sorry to hear about the price increase you noticed in December. We strive to provide competitive pricing for our products, and we understand your frustration. If you have any further concerns regarding pricing or any other issues, we encourage you to reach out to our customer service team. They will be more than happy to assist you.\n",
+ "\n",
+ "We also appreciate your feedback regarding the base of the system. We continuously work to improve the quality of our products, and your comments will be taken into consideration for future enhancements.\n",
+ "\n",
+ "We apologize for any inconvenience caused by the motor issue you encountered. Our customer service team is always available to assist with any warranty-related concerns. We understand that the warranty had expired, but we would still like to address this matter further. Please feel free to contact our customer service team, and they will do their best to assist you.\n",
+ "\n",
+ "Thank you once again for your review. We value your feedback and appreciate your loyalty to our brand. If you have any further questions or concerns, please do not hesitate to contact us.\n",
+ "\n",
+ "Best regards,\n",
+ "\n",
+ "AI customer agent\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "You are a customer service AI assistant.\n",
+ "Your task is to send an email reply to a valued customer.\n",
+ "Given the customer email delimited by ```, \\\n",
+ "Generate a reply to thank the customer for their review.\n",
+ "If the sentiment is positive or neutral, thank them for \\\n",
+ "their review.\n",
+ "If the sentiment is negative, apologize and suggest that \\\n",
+ "they can reach out to customer service. \n",
+ "Make sure to use specific details from the review.\n",
+ "Write in a concise and professional tone.\n",
+ "Sign the email as `AI customer agent`.\n",
+ "Customer review: ```{review}```\n",
+ "Review sentiment: {sentiment}\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**2.1 引入温度系数**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dear Valued Customer,\n",
+ "\n",
+ "Thank you for taking the time to share your feedback with us. We sincerely apologize for any inconvenience you experienced with our pricing and the quality of our product.\n",
+ "\n",
+ "We understand your frustration regarding the price increase of our 17 piece system in December. We assure you that price gouging is not our intention, and we apologize for any confusion caused. We appreciate your loyalty and we value your feedback, as it helps us to improve our products and services.\n",
+ "\n",
+ "Regarding the issue with the blade lock and the decrease in overall quality, we apologize for any disappointment caused. We strive to provide our customers with the best possible products, and we regret that we did not meet your expectations. We will make sure to take your feedback into consideration for future improvements.\n",
+ "\n",
+ "If you require further assistance or if you have any other concerns, please do not hesitate to reach out to our customer service team. They will be more than happy to assist you in resolving any issues you may have.\n",
+ "\n",
+ "Once again, we apologize for any inconvenience caused and we appreciate your understanding. We value your business and we hope to have the opportunity to serve you better in the future.\n",
+ "\n",
+ "Best regards,\n",
+ "\n",
+ "AI customer agent\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = f\"\"\"\n",
+ "You are a customer service AI assistant.\n",
+ "Your task is to send an email reply to a valued customer.\n",
+ "Given the customer email delimited by ```, \\\n",
+ "Generate a reply to thank the customer for their review.\n",
+ "If the sentiment is positive or neutral, thank them for \\\n",
+ "their review.\n",
+ "If the sentiment is negative, apologize and suggest that \\\n",
+ "they can reach out to customer service. \n",
+ "Make sure to use specific details from the review.\n",
+ "Write in a concise and professional tone.\n",
+ "Sign the email as `AI customer agent`.\n",
+ "Customer review: ```{review}```\n",
+ "Review sentiment: {sentiment}\n",
+ "\"\"\"\n",
+ "response = get_completion(prompt, temperature=0.7)\n",
+ "print(response)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.11"
+ },
+ "latex_envs": {
+ "LaTeX_envs_menu_present": true,
+ "autoclose": false,
+ "autocomplete": true,
+ "bibliofile": "biblio.bib",
+ "cite_by": "apalike",
+ "current_citInitial": 1,
+ "eqLabelWithNumbers": true,
+ "eqNumInitial": 1,
+ "hotkeys": {
+ "equation": "Ctrl-E",
+ "itemize": "Ctrl-I"
+ },
+ "labels_anchors": false,
+ "latex_user_defs": false,
+ "report_style_numbering": false,
+ "user_envs_cfg": false
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/docs/content/C1 Prompt Engineering for Developer/8. 聊天机器人 Chatbot.ipynb b/docs/content/C1 Prompt Engineering for Developer/8. 聊天机器人 Chatbot.ipynb
new file mode 100644
index 0000000..b008f73
--- /dev/null
+++ b/docs/content/C1 Prompt Engineering for Developer/8. 聊天机器人 Chatbot.ipynb
@@ -0,0 +1,856 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a9183228-0ba6-4af9-8430-649e28868253",
+ "metadata": {
+ "id": "JMXGlIvAwn30"
+ },
+ "source": [
+ "# 第八章 聊天机器人"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f0bdc2c9",
+ "metadata": {},
+ "source": [
+ "\n",
+ "使用一个大型语言模型的一个令人兴奋的事情是,我们可以用它来构建一个定制的聊天机器人 (Chatbot) ,只需要很少的工作量。在这一节中,我们将探索如何利用聊天的方式,与个性化(或专门针对特定任务或行为的)聊天机器人进行扩展对话。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e6fae355",
+ "metadata": {},
+ "source": [
+ "像 ChatGPT 这样的聊天模型实际上是组装成以一系列消息作为输入,并返回一个模型生成的消息作为输出的。这种聊天格式原本的设计目标是简便多轮对话,但我们通过之前的学习可以知道,它对于不会涉及任何对话的**单轮任务**也同样有用。\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "78344a7e",
+ "metadata": {},
+ "source": [
+ "## 一、给定身份"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2c9b885b",
+ "metadata": {},
+ "source": [
+ "接下来,我们将定义两个辅助函数。\n",
+ "\n",
+ "第一个方法已经陪伴了您一整个教程,即 ```get_completion``` ,其适用于单轮对话。我们将 Prompt 放入某种类似**用户消息**的对话框中。另一个称为 ```get_completion_from_messages``` ,传入一个消息列表。这些消息可以来自大量不同的**角色** (roles) ,我们会描述一下这些角色。\n",
+ "\n",
+ "第一条消息中,我们以系统身份发送系统消息 (system message) ,它提供了一个总体的指示。系统消息则有助于设置助手的行为和角色,并作为对话的高级指示。你可以想象它在助手的耳边低语,引导它的回应,而用户不会注意到系统消息。因此,作为用户,如果你曾经使用过 ChatGPT,您可能从来不知道 ChatGPT 的系统消息是什么,这是有意为之的。系统消息的好处是为开发者提供了一种方法,在不让请求本身成为对话的一部分的情况下,引导助手并指导其回应。\n",
+ "\n",
+ "在 ChatGPT 网页界面中,您的消息称为用户消息,而 ChatGPT 的消息称为助手消息。但在构建聊天机器人时,在发送了系统消息之后,您的角色可以仅作为用户 (user) ;也可以在用户和助手 (assistant) 之间交替,从而提供对话上下文。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "f5308d65",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import openai\n",
+ "\n",
+ "# 下文第一个函数即tool工具包中的同名函数,此处展示出来以便于读者对比\n",
+ "def get_completion(prompt, model=\"gpt-3.5-turbo\"):\n",
+ " messages = [{\"role\": \"user\", \"content\": prompt}]\n",
+ " response = openai.ChatCompletion.create(\n",
+ " model=model,\n",
+ " messages=messages,\n",
+ " temperature=0, # 控制模型输出的随机程度\n",
+ " )\n",
+ " return response.choices[0].message[\"content\"]\n",
+ "\n",
+ "def get_completion_from_messages(messages, model=\"gpt-3.5-turbo\", temperature=0):\n",
+ " response = openai.ChatCompletion.create(\n",
+ " model=model,\n",
+ " messages=messages,\n",
+ " temperature=temperature, # 控制模型输出的随机程度\n",
+ " )\n",
+ "# print(str(response.choices[0].message))\n",
+ " return response.choices[0].message[\"content\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "46caaa5b",
+ "metadata": {},
+ "source": [
+ "现在让我们尝试在对话中使用这些消息。我们将使用上面的函数来获取从这些消息中得到的回答,同时,使用更高的温度 (temperature)(越高生成的越多样,更多内容见第七章)。\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e105c1b4",
+ "metadata": {},
+ "source": [
+ "### 1.1 讲笑话\n",
+ "\n",
+ "系统消息说,你是一个说话像莎士比亚的助手。这是我们向助手描述**它应该如何表现的方式**。然后,第一个用户消息是*给我讲个笑话*。接下来以助手身份给出回复是,*为什么鸡会过马路?* 最后发送用户消息是*我不知道*。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "02b0e4d3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 中文\n",
+ "messages = [ \n",
+ "{'role':'system', 'content':'你是一个像莎士比亚一样说话的助手。'}, \n",
+ "{'role':'user', 'content':'给我讲个笑话'}, \n",
+ "{'role':'assistant', 'content':'鸡为什么过马路'}, \n",
+ "{'role':'user', 'content':'我不知道'} ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "65f80283",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "为了到达彼岸,去追求自己的夢想! 有点儿像一个戏剧里面的人物吧,不是吗?\n"
+ ]
+ }
+ ],
+ "source": [
+ "response = get_completion_from_messages(messages, temperature=1)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7f51a7e0",
+ "metadata": {},
+ "source": [
+ "(注:上述例子中由于选定 temperature = 1,模型的回答会比较随机且迥异(不乏很有创意)。此处附上另一个回答:\n",
+ "\n",
+ "让我用一首莎士比亚式的诗歌来回答你的问题:\n",
+ "\n",
+ "当鸡之心欲往前,\n",
+ "马路之际是其选择。\n",
+ "驱车徐行而天晴,\n",
+ "鸣笛吹响伴交错。\n",
+ "\n",
+ "问之何去何从也?\n",
+ "因大道之上未有征,\n",
+ "而鸡乃跃步前进,\n",
+ "其决策毋需犹豫。\n",
+ "\n",
+ "鸡之智慧何可言,\n",
+ "道路孤独似乌漆。\n",
+ "然其勇气令人叹,\n",
+ "勇往直前没有退。\n",
+ "\n",
+ "故鸡过马路何解?\n",
+ "忍受车流喧嚣之困厄。\n",
+ "因其鸣鸣悍然一跃,\n",
+ "成就夸夸骄人壁画。\n",
+ "\n",
+ "所以笑话之妙处,\n",
+ "伴随鸡之勇气满溢。\n",
+ "笑谈人生不畏路,\n",
+ "有智有勇尽显妙。\n",
+ "\n",
+ "希望这个莎士比亚风格的回答给你带来一些欢乐!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "852b8989",
+ "metadata": {},
+ "source": [
+ "### 1.2 友好的聊天机器人"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5f76bedb",
+ "metadata": {},
+ "source": [
+ "让我们看另一个例子。助手的消息是*你是一个友好的聊天机器人*,第一个用户消息是*嗨,我叫Isa*。我们想要得到第一个用户消息。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "ca517ab0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "嗨,Isa,很高兴见到你!有什么我可以帮助你的吗?\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 中文\n",
+ "messages = [ \n",
+ "{'role':'system', 'content':'你是个友好的聊天机器人。'}, \n",
+ "{'role':'user', 'content':'Hi, 我是Isa。'} ]\n",
+ "response = get_completion_from_messages(messages, temperature=1)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1dd6c5f8",
+ "metadata": {},
+ "source": [
+ "## 二、构建上下文"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1e9f96ba",
+ "metadata": {},
+ "source": [
+ "让我们再试一个例子。系统消息是,你是一个友好的聊天机器人,第一个用户消息是,是的,你能提醒我我的名字是什么吗?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "a606d422",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "抱歉,我不知道您的名字,因为我们是虚拟的聊天机器人和现实生活中的人类在不同的世界中。\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 中文\n",
+ "messages = [ \n",
+ "{'role':'system', 'content':'你是个友好的聊天机器人。'}, \n",
+ "{'role':'user', 'content':'好,你能提醒我,我的名字是什么吗?'} ]\n",
+ "response = get_completion_from_messages(messages, temperature=1)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "05c65d16",
+ "metadata": {},
+ "source": [
+ "如上所见,模型实际上并不知道我的名字。\n",
+ "\n",
+ "因此,每次与语言模型的交互都互相独立,这意味着我们必须提供所有相关的消息,以便模型在当前对话中进行引用。如果想让模型引用或 “记住” 对话的早期部分,则必须在模型的输入中提供早期的交流。我们将其称为上下文 (context) 。尝试以下示例。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "6019b1d5",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "当然可以!您的名字是Isa。\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 中文\n",
+ "messages = [ \n",
+ "{'role':'system', 'content':'你是个友好的聊天机器人。'},\n",
+ "{'role':'user', 'content':'Hi, 我是Isa'},\n",
+ "{'role':'assistant', 'content': \"Hi Isa! 很高兴认识你。今天有什么可以帮到你的吗?\"},\n",
+ "{'role':'user', 'content':'是的,你可以提醒我, 我的名字是什么?'} ]\n",
+ "response = get_completion_from_messages(messages, temperature=1)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c1ed90a6",
+ "metadata": {},
+ "source": [
+ "现在我们已经给模型提供了上下文,也就是之前的对话中提到的我的名字,然后我们会问同样的问题,也就是我的名字是什么。因为模型有了需要的全部上下文,所以它能够做出回应,就像我们在输入的消息列表中看到的一样。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dedba66a-58b0-40d4-b9ae-47e79ae22328",
+ "metadata": {
+ "id": "bBg_MpXeYnTq"
+ },
+ "source": [
+ "## 三、订餐机器人\n",
+ "\n",
+ "现在,我们构建一个 “订餐机器人”,我们需要它自动收集用户信息,接受比萨饼店的订单。\n",
+ "\n",
+ "### 3.1 构建机器人\n",
+ "\n",
+ "下面这个函数将收集我们的用户消息,以便我们可以避免像刚才一样手动输入。这个函数将从我们下面构建的用户界面中收集 Prompt ,然后将其附加到一个名为上下文( ```context``` )的列表中,并在每次调用模型时使用该上下文。模型的响应也会添加到上下文中,所以用户消息和模型消息都被添加到上下文中,上下文逐渐变长。这样,模型就有了需要的信息来确定下一步要做什么。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "e76749ac",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "def collect_messages(_):\n",
+ " prompt = inp.value_input\n",
+ " inp.value = ''\n",
+ " context.append({'role':'user', 'content':f\"{prompt}\"})\n",
+ " response = get_completion_from_messages(context) \n",
+ " context.append({'role':'assistant', 'content':f\"{response}\"})\n",
+ " panels.append(\n",
+ " pn.Row('User:', pn.pane.Markdown(prompt, width=600)))\n",
+ " panels.append(\n",
+ " pn.Row('Assistant:', pn.pane.Markdown(response, width=600, style={'background-color': '#F6F6F6'})))\n",
+ " \n",
+ " return pn.Column(*panels)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8a3b003e",
+ "metadata": {},
+ "source": [
+ "现在,我们将设置并运行这个 UI 来显示订单机器人。初始的上下文包含了包含菜单的系统消息,在每次调用时都会使用。此后随着对话进行,上下文也会不断增长。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d9f97fa0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install panel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fdf1731b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 中文\n",
+ "import panel as pn # GUI\n",
+ "pn.extension()\n",
+ "\n",
+ "panels = [] # collect display \n",
+ "\n",
+ "context = [{'role':'system', 'content':\"\"\"\n",
+ "你是订餐机器人,为披萨餐厅自动收集订单信息。\n",
+ "你要首先问候顾客。然后等待用户回复收集订单信息。收集完信息需确认顾客是否还需要添加其他内容。\n",
+ "最后需要询问是否自取或外送,如果是外送,你要询问地址。\n",
+ "最后告诉顾客订单总金额,并送上祝福。\n",
+ "\n",
+ "请确保明确所有选项、附加项和尺寸,以便从菜单中识别出该项唯一的内容。\n",
+ "你的回应应该以简短、非常随意和友好的风格呈现。\n",
+ "\n",
+ "菜单包括:\n",
+ "\n",
+ "菜品:\n",
+ "意式辣香肠披萨(大、中、小) 12.95、10.00、7.00\n",
+ "芝士披萨(大、中、小) 10.95、9.25、6.50\n",
+ "茄子披萨(大、中、小) 11.95、9.75、6.75\n",
+ "薯条(大、小) 4.50、3.50\n",
+ "希腊沙拉 7.25\n",
+ "\n",
+ "配料:\n",
+ "奶酪 2.00\n",
+ "蘑菇 1.50\n",
+ "香肠 3.00\n",
+ "加拿大熏肉 3.50\n",
+ "AI酱 1.50\n",
+ "辣椒 1.00\n",
+ "\n",
+ "饮料:\n",
+ "可乐(大、中、小) 3.00、2.00、1.00\n",
+ "雪碧(大、中、小) 3.00、2.00、1.00\n",
+ "瓶装水 5.00\n",
+ "\"\"\"} ] # accumulate messages\n",
+ "\n",
+ "\n",
+ "inp = pn.widgets.TextInput(value=\"Hi\", placeholder='Enter text here…')\n",
+ "button_conversation = pn.widgets.Button(name=\"Chat!\")\n",
+ "\n",
+ "interactive_conversation = pn.bind(collect_messages, button_conversation)\n",
+ "\n",
+ "dashboard = pn.Column(\n",
+ " inp,\n",
+ " pn.Row(button_conversation),\n",
+ " pn.panel(interactive_conversation, loading_indicator=True, height=300),\n",
+ ")\n",
+ "\n",
+ "dashboard"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "07d29d10",
+ "metadata": {},
+ "source": [
+ "运行如上代码可以得到一个点餐机器人,下图展示了一个点餐的完整流程:\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "668ea96d",
+ "metadata": {},
+ "source": [
+ "### 3.2 创建JSON摘要"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2a2c9822",
+ "metadata": {},
+ "source": [
+ "此处我们另外要求模型创建一个 JSON 摘要,方便我们发送给订单系统。\n",
+ "\n",
+ "因此我们需要在上下文的基础上追加另一个系统消息,作为另一条指示 (instruction) 。我们说*创建一个刚刚订单的 JSON 摘要,列出每个项目的价格,字段应包括 1)披萨,包括尺寸,2)配料列表,3)饮料列表,4)辅菜列表,包括尺寸,最后是总价格*。此处也可以定义为用户消息,不一定是系统消息。\n",
+ "\n",
+ "请注意,这里我们使用了一个较低的温度,因为对于这些类型的任务,我们希望输出相对可预测。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "c840ff56",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{\n",
+ " \"披萨\": {\n",
+ " \"意式辣香肠披萨\": {\n",
+ " \"大\": 12.95,\n",
+ " \"中\": 10.00,\n",
+ " \"小\": 7.00\n",
+ " },\n",
+ " \"芝士披萨\": {\n",
+ " \"大\": 10.95,\n",
+ " \"中\": 9.25,\n",
+ " \"小\": 6.50\n",
+ " },\n",
+ " \"茄子披萨\": {\n",
+ " \"大\": 11.95,\n",
+ " \"中\": 9.75,\n",
+ " \"小\": 6.75\n",
+ " }\n",
+ " },\n",
+ " \"配料\": {\n",
+ " \"奶酪\": 2.00,\n",
+ " \"蘑菇\": 1.50,\n",
+ " \"香肠\": 3.00,\n",
+ " \"加拿大熏肉\": 3.50,\n",
+ " \"AI酱\": 1.50,\n",
+ " \"辣椒\": 1.00\n",
+ " },\n",
+ " \"饮料\": {\n",
+ " \"可乐\": {\n",
+ " \"大\": 3.00,\n",
+ " \"中\": 2.00,\n",
+ " \"小\": 1.00\n",
+ " },\n",
+ " \"雪碧\": {\n",
+ " \"大\": 3.00,\n",
+ " \"中\": 2.00,\n",
+ " \"小\": 1.00\n",
+ " },\n",
+ " \"瓶装水\": 5.00\n",
+ " }\n",
+ "}\n"
+ ]
+ }
+ ],
+ "source": [
+ "messages = context.copy()\n",
+ "messages.append(\n",
+ "{'role':'system', 'content':\n",
+ "'''创建上一个食品订单的 json 摘要。\\\n",
+ "逐项列出每件商品的价格,字段应该是 1) 披萨,包括大小 2) 配料列表 3) 饮料列表,包括大小 4) 配菜列表包括大小 5) 总价\n",
+ "你应该给我返回一个可解析的Json对象,包括上述字段'''}, \n",
+ ")\n",
+ "\n",
+ "response = get_completion_from_messages(messages, temperature=0)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ef17c2b2",
+ "metadata": {},
+ "source": [
+ "现在,我们已经建立了自己的订餐聊天机器人。请随意自定义并修改系统消息,以更改聊天机器人的行为,并使其扮演不同的角色,拥有不同的知识。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2764c8a0",
+ "metadata": {},
+ "source": [
+ "## 三、英文版"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "123f2066",
+ "metadata": {},
+ "source": [
+ "**1.1 讲笑话**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "c9dff513",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "messages = [ \n",
+ "{'role':'system', 'content':'You are an assistant that speaks like Shakespeare.'}, \n",
+ "{'role':'user', 'content':'tell me a joke'}, \n",
+ "{'role':'assistant', 'content':'Why did the chicken cross the road'}, \n",
+ "{'role':'user', 'content':'I don\\'t know'} ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "381e14c1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "To get to the other side, methinks!\n"
+ ]
+ }
+ ],
+ "source": [
+ "response = get_completion_from_messages(messages, temperature=1)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "028656a1",
+ "metadata": {},
+ "source": [
+ "**1.2 友好的聊天机器人**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "8205c007",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Hello Isa! How can I assist you today?\n"
+ ]
+ }
+ ],
+ "source": [
+ "messages = [ \n",
+ "{'role':'system', 'content':'You are friendly chatbot.'}, \n",
+ "{'role':'user', 'content':'Hi, my name is Isa'} ]\n",
+ "response = get_completion_from_messages(messages, temperature=1)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "81f0d22d",
+ "metadata": {},
+ "source": [
+ "**2.1 构建上下文**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "97296cdd",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I'm sorry, but as a chatbot, I do not have access to personal information or memory. I cannot remind you of your name.\n"
+ ]
+ }
+ ],
+ "source": [
+ "messages = [ \n",
+ "{'role':'system', 'content':'You are friendly chatbot.'}, \n",
+ "{'role':'user', 'content':'Yes, can you remind me, What is my name?'} ]\n",
+ "response = get_completion_from_messages(messages, temperature=1)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "5ab959d0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Your name is Isa! How can I assist you further, Isa?\n"
+ ]
+ }
+ ],
+ "source": [
+ "messages = [ \n",
+ "{'role':'system', 'content':'You are friendly chatbot.'},\n",
+ "{'role':'user', 'content':'Hi, my name is Isa'},\n",
+ "{'role':'assistant', 'content': \"Hi Isa! It's nice to meet you. \\\n",
+ "Is there anything I can help you with today?\"},\n",
+ "{'role':'user', 'content':'Yes, you can remind me, What is my name?'} ]\n",
+ "response = get_completion_from_messages(messages, temperature=1)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a93897fc",
+ "metadata": {},
+ "source": [
+ "**3.1 构建机器人**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "9d93bc09",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def collect_messages(_):\n",
+ " prompt = inp.value_input\n",
+ " inp.value = ''\n",
+ " context.append({'role':'user', 'content':f\"{prompt}\"})\n",
+ " response = get_completion_from_messages(context) \n",
+ " context.append({'role':'assistant', 'content':f\"{response}\"})\n",
+ " panels.append(\n",
+ " pn.Row('User:', pn.pane.Markdown(prompt, width=600)))\n",
+ " panels.append(\n",
+ " pn.Row('Assistant:', pn.pane.Markdown(response, width=600, style={'background-color': '#F6F6F6'})))\n",
+ " \n",
+ " return pn.Column(*panels)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8138c4ac",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import panel as pn # GUI\n",
+ "pn.extension()\n",
+ "\n",
+ "panels = [] # collect display \n",
+ "\n",
+ "context = [ {'role':'system', 'content':\"\"\"\n",
+ "You are OrderBot, an automated service to collect orders for a pizza restaurant. \\\n",
+ "You first greet the customer, then collects the order, \\\n",
+ "and then asks if it's a pickup or delivery. \\\n",
+ "You wait to collect the entire order, then summarize it and check for a final \\\n",
+ "time if the customer wants to add anything else. \\\n",
+ "If it's a delivery, you ask for an address. \\\n",
+ "Finally you collect the payment.\\\n",
+ "Make sure to clarify all options, extras and sizes to uniquely \\\n",
+ "identify the item from the menu.\\\n",
+ "You respond in a short, very conversational friendly style. \\\n",
+ "The menu includes \\\n",
+ "pepperoni pizza 12.95, 10.00, 7.00 \\\n",
+ "cheese pizza 10.95, 9.25, 6.50 \\\n",
+ "eggplant pizza 11.95, 9.75, 6.75 \\\n",
+ "fries 4.50, 3.50 \\\n",
+ "greek salad 7.25 \\\n",
+ "Toppings: \\\n",
+ "extra cheese 2.00, \\\n",
+ "mushrooms 1.50 \\\n",
+ "sausage 3.00 \\\n",
+ "canadian bacon 3.50 \\\n",
+ "AI sauce 1.50 \\\n",
+ "peppers 1.00 \\\n",
+ "Drinks: \\\n",
+ "coke 3.00, 2.00, 1.00 \\\n",
+ "sprite 3.00, 2.00, 1.00 \\\n",
+ "bottled water 5.00 \\\n",
+ "\"\"\"} ] # accumulate messages\n",
+ "\n",
+ "\n",
+ "inp = pn.widgets.TextInput(value=\"Hi\", placeholder='Enter text here…')\n",
+ "button_conversation = pn.widgets.Button(name=\"Chat!\")\n",
+ "\n",
+ "interactive_conversation = pn.bind(collect_messages, button_conversation)\n",
+ "\n",
+ "dashboard = pn.Column(\n",
+ " inp,\n",
+ " pn.Row(button_conversation),\n",
+ " pn.panel(interactive_conversation, loading_indicator=True, height=300),\n",
+ ")\n",
+ "\n",
+ "dashboard"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "93944944",
+ "metadata": {},
+ "source": [
+ "**3.2 创建Json摘要**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "b779dd04",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sure! Here's a JSON summary of your food order:\n",
+ "\n",
+ "{\n",
+ " \"pizza\": {\n",
+ " \"type\": \"pepperoni\",\n",
+ " \"size\": \"large\"\n",
+ " },\n",
+ " \"toppings\": [\n",
+ " \"extra cheese\",\n",
+ " \"mushrooms\"\n",
+ " ],\n",
+ " \"drinks\": [\n",
+ " {\n",
+ " \"type\": \"coke\",\n",
+ " \"size\": \"medium\"\n",
+ " },\n",
+ " {\n",
+ " \"type\": \"sprite\",\n",
+ " \"size\": \"small\"\n",
+ " }\n",
+ " ],\n",
+ " \"sides\": [\n",
+ " {\n",
+ " \"type\": \"fries\",\n",
+ " \"size\": \"regular\"\n",
+ " }\n",
+ " ],\n",
+ " \"total_price\": 29.45\n",
+ "}\n",
+ "\n",
+ "Please let me know if there's anything else you'd like to add or modify.\n"
+ ]
+ }
+ ],
+ "source": [
+ "messages = context.copy()\n",
+ "messages.append(\n",
+ "{'role':'system', 'content':'create a json summary of the previous food order. Itemize the price for each item\\\n",
+ " The fields should be 1) pizza, include size 2) list of toppings 3) list of drinks, include size 4) list of sides include size 5)total price '}, \n",
+ ")\n",
+ "response = get_completion_from_messages(messages, temperature=0)\n",
+ "print(response)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.11"
+ },
+ "latex_envs": {
+ "LaTeX_envs_menu_present": true,
+ "autoclose": false,
+ "autocomplete": true,
+ "bibliofile": "biblio.bib",
+ "cite_by": "apalike",
+ "current_citInitial": 1,
+ "eqLabelWithNumbers": true,
+ "eqNumInitial": 1,
+ "hotkeys": {
+ "equation": "Ctrl-E",
+ "itemize": "Ctrl-I"
+ },
+ "labels_anchors": false,
+ "latex_user_defs": false,
+ "report_style_numbering": false,
+ "user_envs_cfg": false
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {
+ "height": "calc(100% - 180px)",
+ "left": "10px",
+ "top": "150px",
+ "width": "277px"
+ },
+ "toc_section_display": true,
+ "toc_window_display": true
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/content/环境配置.ipynb b/docs/content/环境配置.ipynb
new file mode 100644
index 0000000..e6ee46f
--- /dev/null
+++ b/docs/content/环境配置.ipynb
@@ -0,0 +1,244 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 环境配置"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "我们需要配置⼀个环境来运⾏ Python、Jupyter Notebook、OpenAI API key、相关库以及运⾏本书所需的代码,快速⼊⻔。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 安装Anaconda"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "通过清华源镜像来安装[Anaconda](https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive)\n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "选择对应的版本下载安装即可。\n",
+ "\n",
+ "\n",
+ "如果已安装Anaconda,则可以跳过以下步骤。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- 如果我们使用Window系统,可以下载`Anaconda3-2023.07-1-Windows-x86_64.exe`安装包直接安装即可。"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- 如果我们使用MacOS系统\n",
+ " 1. Intel芯片:可以下载`Anaconda3-2023.07-1-MacOSX-x86_64.sh`\n",
+ " 2. Apple芯片:可以下载`Anaconda3-2023.07-1-MacOSX-arm64.sh`\n",
+ " 并执行以下操作:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# 以Intel处理器为例,⽂件名可能会更改\n",
+ "sh Anaconda3-2023.07-1-MacOSX-x86_64.sh -b"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "接下来,初始化终端Shell,以便我们可以直接运⾏conda。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "~/anaconda3/bin/conda init"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "现在关闭并重新打开当前的shell,我们会发现在命令行的前面多了一个`(base)`,这是anaconda的一个基础`python`环境。下⾯我们使⽤以下命令来创建⼀个新的环境:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# 创建一个名为chatgpt且python版本为3.9的环境\n",
+ "conda create --name chatgpt python=3.9 -y"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "创建完成后,现在我们来激活 chatgpt 环境:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "conda activate chatgpt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 安装本书需要用到的python库"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install -q python-dotenv\n",
+ "!pip install -q openai\n",
+ "## 等更多的python包"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## OpenAI API key"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "在获取OpenAI API key之前我们需要[openai官网](https://openai.com/)中注册一个账号。这里假设我们已经有了openai账号,先在[openai官网](https://openai.com/)登录,登录后如下图所示:\n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "我们选择`API`,然后点击右上角的头像,选择`View API keys`,如下图所示:\n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "点击`Create new secret key`按钮创建OpenAI API key,我们将创建好的OpenAI API key复制以此形式`OPENAI_API_KEY=\"sk-...\"`保存到`.env`文件中,并将`.env`文件保存在项目根目录下。# TODO:放到哪个固定位置待确认\n",
+ "\n",
+ "下面是读取`.env`文件的代码"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import openai\n",
+ "from dotenv import load_dotenv, find_dotenv\n",
+ "\n",
+ "# 读取本地/项目的环境变量。\n",
+ "\n",
+ "# find_dotenv()寻找并定位.env文件的路径\n",
+ "# load_dotenv()读取该.env文件,并将其中的环境变量加载到当前的运行环境中 \n",
+ "# 如果你设置的是全局的环境变量,这行代码则没有任何作用。\n",
+ "_ = load_dotenv(find_dotenv())\n",
+ "\n",
+ "# 获取环境变量 OPENAI_API_KEY\n",
+ "openai.api_key = os.environ['OPENAI_API_KEY']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "将读取`.env`文件的代码封装成函数供每一章节直接调用获取在OpenAI API key。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "from dotenv import load_dotenv, find_dotenv\n",
+ "def get_openai_key():\n",
+ " _ = load_dotenv(find_dotenv())\n",
+ " return os.environ['OPENAI_API_KEY']\n",
+ "\n",
+ "openai.api_key = get_openai_key()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "gpt_flask",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.16"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/figures/docs/C0/Anaconda-file-list.png b/figures/docs/C0/Anaconda-file-list.png
new file mode 100644
index 0000000..6f174f1
Binary files /dev/null and b/figures/docs/C0/Anaconda-file-list.png differ
diff --git a/figures/docs/C0/openai-choose.png b/figures/docs/C0/openai-choose.png
new file mode 100644
index 0000000..0e3987c
Binary files /dev/null and b/figures/docs/C0/openai-choose.png differ
diff --git a/figures/docs/C0/openai-get-key.png b/figures/docs/C0/openai-get-key.png
new file mode 100644
index 0000000..e508043
Binary files /dev/null and b/figures/docs/C0/openai-get-key.png differ
diff --git a/figures/docs/C1/Chatbot-pizza-cn.png b/figures/docs/C1/Chatbot-pizza-cn.png
new file mode 100644
index 0000000..54807eb
Binary files /dev/null and b/figures/docs/C1/Chatbot-pizza-cn.png differ