lingyit1108 commited on
Commit
69e20d0
Β·
1 Parent(s): 8434471

finishing up the app: UI, content understanding, questionaire, coaching

Browse files
bin/clean.sh CHANGED
@@ -2,4 +2,6 @@
2
 
3
  find . -name __pycache__ | xargs rm -rf
4
  find . -name .pytest_cache | xargs rm -rf
5
- find . -name .ipynb_checkpoints | xargs rm -rf
 
 
 
2
 
3
  find . -name __pycache__ | xargs rm -rf
4
  find . -name .pytest_cache | xargs rm -rf
5
+ find . -name .ipynb_checkpoints | xargs rm -rf
6
+
7
+ python reset_database.py
database/mock_qna.sqlite CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8949591dc84ad447843c2741803c39e545dc11c6e39cefca75ab1416a6140e3a
3
  size 20480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66df2080ffb3456c39b4bf554effc49266e957e63792ce12a4f11cb991f369fd
3
  size 20480
notebooks/002_persisted-embedding-model.ipynb CHANGED
@@ -40,6 +40,7 @@
40
  "source": [
41
  "# load some documents\n",
42
  "documents = SimpleDirectoryReader(input_files=[\n",
 
43
  " \"../raw_documents/HI_Knowledge_Base.pdf\",\n",
44
  " \"../raw_documents/HI Chapter Summary Version 1.3.pdf\",\n",
45
  " \"../raw_documents/qna.txt\"\n",
 
40
  "source": [
41
  "# load some documents\n",
42
  "documents = SimpleDirectoryReader(input_files=[\n",
43
+ " \"../raw_documents/overview_background.txt\",\n",
44
  " \"../raw_documents/HI_Knowledge_Base.pdf\",\n",
45
  " \"../raw_documents/HI Chapter Summary Version 1.3.pdf\",\n",
46
  " \"../raw_documents/qna.txt\"\n",
notebooks/007_test_hi_content_engine.ipynb ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "ac0cc1aa-e68d-432d-b316-52e272c43207",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import streamlit as st\n",
11
+ "from streamlit_feedback import streamlit_feedback\n",
12
+ "\n",
13
+ "import os\n",
14
+ "import pandas as pd\n",
15
+ "import base64\n",
16
+ "from io import BytesIO\n",
17
+ "import sys\n",
18
+ "sys.path.insert(0, \"../\")\n",
19
+ "\n",
20
+ "import chromadb\n",
21
+ "from llama_index.core import (\n",
22
+ " VectorStoreIndex, \n",
23
+ " SimpleDirectoryReader,\n",
24
+ " StorageContext,\n",
25
+ " Document\n",
26
+ ")\n",
27
+ "from llama_index.vector_stores.chroma.base import ChromaVectorStore\n",
28
+ "from llama_index.embeddings.huggingface.base import HuggingFaceEmbedding\n",
29
+ "from llama_index.llms.openai import OpenAI\n",
30
+ "from llama_index.core.memory import ChatMemoryBuffer\n",
31
+ "from llama_index.core.tools import QueryEngineTool\n",
32
+ "from llama_index.agent.openai import OpenAIAgent\n",
33
+ "from llama_index.core import Settings\n",
34
+ "\n",
35
+ "from vision_api import get_transcribed_text\n",
36
+ "from qna_prompting import get_qna_question_tool, evaluate_qna_answer_tool\n",
37
+ "\n",
38
+ "import nest_asyncio\n",
39
+ "nest_asyncio.apply()"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": null,
45
+ "id": "8b05cb9b-869a-409c-8d4f-aafae703c558",
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "@st.cache_resource\n",
50
+ "def get_document_object(input_files):\n",
51
+ " documents = SimpleDirectoryReader(input_files=input_files).load_data()\n",
52
+ " document = Document(text=\"\\n\\n\".join([doc.text for doc in documents]))\n",
53
+ " return document\n",
54
+ "\n",
55
+ "@st.cache_resource\n",
56
+ "def get_llm_object(selected_model, temperature):\n",
57
+ " llm = OpenAI(model=selected_model, temperature=temperature)\n",
58
+ " return llm\n",
59
+ "\n",
60
+ "@st.cache_resource\n",
61
+ "def get_embedding_model(model_name, fine_tuned_path=None):\n",
62
+ " if fine_tuned_path is None:\n",
63
+ " print(f\"loading from `{model_name}` from huggingface\")\n",
64
+ " embed_model = HuggingFaceEmbedding(model_name=model_name)\n",
65
+ " else:\n",
66
+ " print(f\"loading from local `{fine_tuned_path}`\")\n",
67
+ " embed_model = fine_tuned_path\n",
68
+ " return embed_model\n",
69
+ "\n",
70
+ "@st.cache_resource\n",
71
+ "def get_query_engine(input_files, llm_model, temperature,\n",
72
+ " embedding_model, fine_tuned_path,\n",
73
+ " system_content, persisted_vector_db):\n",
74
+ " \n",
75
+ " llm = get_llm_object(llm_model, temperature)\n",
76
+ " embedded_model = get_embedding_model(\n",
77
+ " model_name=embedding_model, \n",
78
+ " fine_tuned_path=fine_tuned_path\n",
79
+ " )\n",
80
+ " Settings.llm = llm\n",
81
+ " Settings.chunk_size = 1024\n",
82
+ " Settings.embed_model = embedded_model\n",
83
+ "\n",
84
+ " if os.path.exists(persisted_vector_db):\n",
85
+ " print(\"loading from vector database - chroma\")\n",
86
+ " db = chromadb.PersistentClient(path=persisted_vector_db)\n",
87
+ " chroma_collection = db.get_or_create_collection(\"quickstart\")\n",
88
+ " vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
89
+ " storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
90
+ "\n",
91
+ " index = VectorStoreIndex.from_vector_store(\n",
92
+ " vector_store=vector_store,\n",
93
+ " storage_context=storage_context\n",
94
+ " )\n",
95
+ " else:\n",
96
+ " print(\"create new chroma vector database..\")\n",
97
+ " documents = SimpleDirectoryReader(input_files=input_files).load_data()\n",
98
+ " \n",
99
+ " db = chromadb.PersistentClient(path=persisted_vector_db)\n",
100
+ " chroma_collection = db.get_or_create_collection(\"quickstart\")\n",
101
+ " vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
102
+ " \n",
103
+ " nodes = Settings.node_parser.get_nodes_from_documents(documents)\n",
104
+ " storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
105
+ " storage_context.docstore.add_documents(nodes)\n",
106
+ "\n",
107
+ " index = VectorStoreIndex(nodes, storage_context=storage_context)\n",
108
+ " \n",
109
+ " memory = ChatMemoryBuffer.from_defaults(token_limit=15000)\n",
110
+ " hi_content_engine = index.as_query_engine(\n",
111
+ " memory=memory,\n",
112
+ " system_prompt=system_content,\n",
113
+ " similarity_top_k=20,\n",
114
+ " streaming=True\n",
115
+ " )\n",
116
+ " hi_textbook_query_description = \"\"\"\n",
117
+ " Use this tool to extract content from textbook `Health Insurance 7th Edition`,\n",
118
+ " that has 15 chapters in total. When user wants to learn more about a \n",
119
+ " particular chapter, this tool will help to assist user to get better\n",
120
+ " understanding of the content of the textbook.\n",
121
+ " \"\"\"\n",
122
+ " \n",
123
+ " hi_query_tool = QueryEngineTool.from_defaults(\n",
124
+ " query_engine=hi_content_engine,\n",
125
+ " name=\"health_insurance_textbook_query_engine\",\n",
126
+ " description=hi_textbook_query_description\n",
127
+ " )\n",
128
+ "\n",
129
+ " agent = OpenAIAgent.from_tools(tools=[\n",
130
+ " hi_query_tool, \n",
131
+ " get_qna_question_tool,\n",
132
+ " evaluate_qna_answer_tool\n",
133
+ " ],\n",
134
+ " max_function_calls=1,\n",
135
+ " llm=llm, \n",
136
+ " verbose=True,\n",
137
+ " system_prompt=textbook_content)\n",
138
+ " print(\"loaded AI agent, let's begin the chat!\")\n",
139
+ " print(\"=\"*50)\n",
140
+ " print(\"\")\n",
141
+ "\n",
142
+ " return agent\n",
143
+ "\n",
144
+ "def generate_llm_response(prompt_input, tool_choice=\"auto\"):\n",
145
+ " chat_agent = get_query_engine(input_files=input_files, \n",
146
+ " llm_model=selected_model, \n",
147
+ " temperature=temperature,\n",
148
+ " embedding_model=embedding_model,\n",
149
+ " fine_tuned_path=fine_tuned_path,\n",
150
+ " system_content=system_content,\n",
151
+ " persisted_vector_db=persisted_vector_db)\n",
152
+ " \n",
153
+ " # st.session_state.messages\n",
154
+ " response = chat_agent.stream_chat(prompt_input, tool_choice=tool_choice)\n",
155
+ " return response\n",
156
+ "\n",
157
+ "def handle_feedback(user_response):\n",
158
+ " st.toast(\"βœ”οΈ Feedback received!\")\n",
159
+ " st.session_state.feedback = False\n",
160
+ "\n",
161
+ "def handle_image_upload():\n",
162
+ " st.session_state.release_file = \"true\""
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": null,
168
+ "id": "f148426b-1634-45ed-a1fa-44e9c6ab14ac",
169
+ "metadata": {},
170
+ "outputs": [],
171
+ "source": []
172
+ },
173
+ {
174
+ "cell_type": "code",
175
+ "execution_count": null,
176
+ "id": "4461d081-d8d0-4801-ad52-dbe826cbfe59",
177
+ "metadata": {},
178
+ "outputs": [],
179
+ "source": [
180
+ "openai_api = os.getenv(\"OPENAI_API_KEY\")"
181
+ ]
182
+ },
183
+ {
184
+ "cell_type": "code",
185
+ "execution_count": null,
186
+ "id": "2a24c861-896b-4800-8478-73f8cd65e8fa",
187
+ "metadata": {},
188
+ "outputs": [],
189
+ "source": [
190
+ "image_prompt = False\n",
191
+ "# llm_model = \"gpt-3.5-turbo-0125\"\n",
192
+ "llm_model = \"gpt-4-0125-preview\"\n",
193
+ "temperature = 0\n",
194
+ "\n",
195
+ "input_files = [\"./raw_documents/HI Chapter Summary Version 1.3.pdf\",\n",
196
+ " \"./raw_documents/qna.txt\"]\n",
197
+ "embedding_model = \"BAAI/bge-small-en-v1.5\"\n",
198
+ "persisted_vector_db = \"../models/chroma_db\"\n",
199
+ "fine_tuned_path = \"local:../models/fine-tuned-embeddings\"\n",
200
+ "system_content = (\n",
201
+ " \"You are a helpful study assistant. \"\n",
202
+ " \"You do not respond as 'User' or pretend to be 'User'. \"\n",
203
+ " \"You only respond once as 'Assistant'.\"\n",
204
+ ")\n",
205
+ "textbook_content = (\n",
206
+ " \"The content of the textbook `Health Insurance 7th Edition` are as follows,\"\n",
207
+ " \"- Chapter 1: Overview Of Healthcare Environment In Singapore\"\n",
208
+ " \"- Chapter 2: Medical Expense Insurance\"\n",
209
+ " \"- Chapter 3: Group Medical Expense Insurance\"\n",
210
+ " \"- Chapter 4: Disability Income Insurance\"\n",
211
+ " \"- Chapter 5: Long-Term Care Insurance \"\n",
212
+ " \"- Chapter 6: Critical Illness Insurance\"\n",
213
+ " \"- Chapter 7: Other Types Of Health Insurance\"\n",
214
+ " \"- Chapter 8: Managed Healthcare\"\n",
215
+ " \"- Chapter 9: Part I Healthcare Financing\"\n",
216
+ " \"- Chapter 9: Part II Healthcare Financing\"\n",
217
+ " \"- Chapter 10: Common Policy Provisions\"\n",
218
+ " \"- Chapter 11: Health Insurance Pricing\"\n",
219
+ " \"- Chapter 12: Health Insurance Underwriting\"\n",
220
+ " \"- Chapter 13: Notice No: MAS 120 Disclosure And Advisory Process - Requirements For Accident And Health Insurance Products\"\n",
221
+ " \"- Chapter 14: Financial Needs Analysis\"\n",
222
+ " \"- Chapter 15: Case Studies\"\n",
223
+ ")"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": null,
229
+ "id": "d5e4b22c-1e29-4ab8-9039-6e86f566871a",
230
+ "metadata": {},
231
+ "outputs": [],
232
+ "source": [
233
+ "llm = get_llm_object(llm_model, temperature)\n",
234
+ "embedded_model = get_embedding_model(\n",
235
+ " model_name=embedding_model, \n",
236
+ " fine_tuned_path=fine_tuned_path\n",
237
+ ")\n",
238
+ "Settings.llm = llm\n",
239
+ "Settings.chunk_size = 1024\n",
240
+ "Settings.embed_model = embedded_model"
241
+ ]
242
+ },
243
+ {
244
+ "cell_type": "code",
245
+ "execution_count": null,
246
+ "id": "e92d21e3-8483-4f24-91cf-40a6c10d43c5",
247
+ "metadata": {},
248
+ "outputs": [],
249
+ "source": []
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": null,
254
+ "id": "5753c6ed-41a6-40b5-bc4f-477eb7c1d5c5",
255
+ "metadata": {},
256
+ "outputs": [],
257
+ "source": [
258
+ "print(\"loading from vector database - chroma\")\n",
259
+ "db = chromadb.PersistentClient(path=persisted_vector_db)\n",
260
+ "chroma_collection = db.get_or_create_collection(\"quickstart\")\n",
261
+ "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
262
+ "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
263
+ "\n",
264
+ "index = VectorStoreIndex.from_vector_store(\n",
265
+ " vector_store=vector_store,\n",
266
+ " storage_context=storage_context\n",
267
+ ")"
268
+ ]
269
+ },
270
+ {
271
+ "cell_type": "code",
272
+ "execution_count": null,
273
+ "id": "d91e2dda-cb74-4d85-adce-a4a72c53cc7d",
274
+ "metadata": {},
275
+ "outputs": [],
276
+ "source": []
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "execution_count": null,
281
+ "id": "e4211bb2-aba9-4be2-b2f1-6fbd3f7e4223",
282
+ "metadata": {},
283
+ "outputs": [],
284
+ "source": [
285
+ "memory = ChatMemoryBuffer.from_defaults(token_limit=15000)\n",
286
+ "hi_content_engine = index.as_query_engine(\n",
287
+ " memory=memory,\n",
288
+ " system_prompt=system_content,\n",
289
+ " similarity_top_k=8,\n",
290
+ " verbose=True,\n",
291
+ " streaming=True\n",
292
+ ")"
293
+ ]
294
+ },
295
+ {
296
+ "cell_type": "code",
297
+ "execution_count": null,
298
+ "id": "007f8bf5-19c5-4462-b5f2-5f4ff30f593b",
299
+ "metadata": {},
300
+ "outputs": [],
301
+ "source": [
302
+ "hi_textbook_query_description = \"\"\"\n",
303
+ " Use this tool to extract content from the query engine,\n",
304
+ " which is built by ingesting textbook content from `Health Insurance 7th Edition`,\n",
305
+ " that has 15 chapters in total. When user wants to learn more about a \n",
306
+ " particular chapter, this tool will help to assist user to get better\n",
307
+ " understanding of the content of the textbook.\n",
308
+ "\"\"\"\n",
309
+ "\n",
310
+ "hi_query_tool = QueryEngineTool.from_defaults(\n",
311
+ " query_engine=hi_content_engine,\n",
312
+ " name=\"health_insurance_textbook_query_engine\",\n",
313
+ " description=hi_textbook_query_description\n",
314
+ ")\n",
315
+ "agent = OpenAIAgent.from_tools(tools=[\n",
316
+ " hi_query_tool, \n",
317
+ " get_qna_question_tool,\n",
318
+ " evaluate_qna_answer_tool\n",
319
+ " ],\n",
320
+ " max_function_calls=1,\n",
321
+ " llm=llm, \n",
322
+ " verbose=True,\n",
323
+ " system_prompt=textbook_content)\n",
324
+ "\n",
325
+ "print(\"loaded AI agent, let's begin the chat!\")\n",
326
+ "print(\"=\"*50)\n",
327
+ "print(\"\")"
328
+ ]
329
+ },
330
+ {
331
+ "cell_type": "code",
332
+ "execution_count": null,
333
+ "id": "a2e42ad6-20fc-4f2e-a4ea-403e79b14ba4",
334
+ "metadata": {},
335
+ "outputs": [],
336
+ "source": []
337
+ },
338
+ {
339
+ "cell_type": "code",
340
+ "execution_count": null,
341
+ "id": "c62e817e-c7c8-4f90-9e32-217fec376565",
342
+ "metadata": {},
343
+ "outputs": [],
344
+ "source": [
345
+ "response = hi_content_engine.query(\"can you give me the list of chapters that `Health Insurance 7th Edition` covers\")"
346
+ ]
347
+ },
348
+ {
349
+ "cell_type": "code",
350
+ "execution_count": null,
351
+ "id": "5902ffd2-2f66-4b89-bf7f-a05e3fdeccaa",
352
+ "metadata": {},
353
+ "outputs": [],
354
+ "source": [
355
+ "for res in response.response_gen:\n",
356
+ " print(res, end=\"\")"
357
+ ]
358
+ },
359
+ {
360
+ "cell_type": "code",
361
+ "execution_count": null,
362
+ "id": "0e75453b-85c7-4e1c-8683-6df45a13cacb",
363
+ "metadata": {},
364
+ "outputs": [],
365
+ "source": []
366
+ },
367
+ {
368
+ "cell_type": "code",
369
+ "execution_count": null,
370
+ "id": "0b97d90d-5c59-486f-863b-4aaa12ed0ea0",
371
+ "metadata": {},
372
+ "outputs": [],
373
+ "source": []
374
+ },
375
+ {
376
+ "cell_type": "code",
377
+ "execution_count": null,
378
+ "id": "4584aa46-b488-4535-9d69-2736c9dad170",
379
+ "metadata": {},
380
+ "outputs": [],
381
+ "source": [
382
+ "response = agent.stream_chat(\"hihi\", tool_choice=\"auto\")"
383
+ ]
384
+ },
385
+ {
386
+ "cell_type": "code",
387
+ "execution_count": null,
388
+ "id": "eff8bb8d-a2d1-428a-9c3d-193389378288",
389
+ "metadata": {},
390
+ "outputs": [],
391
+ "source": [
392
+ "for res in response.response_gen:\n",
393
+ " print(res, end=\"\")"
394
+ ]
395
+ },
396
+ {
397
+ "cell_type": "code",
398
+ "execution_count": null,
399
+ "id": "b7a504af-6499-4649-8e68-2a86d415e458",
400
+ "metadata": {},
401
+ "outputs": [],
402
+ "source": []
403
+ }
404
+ ],
405
+ "metadata": {
406
+ "kernelspec": {
407
+ "display_name": "Python 3 (ipykernel)",
408
+ "language": "python",
409
+ "name": "python3"
410
+ },
411
+ "language_info": {
412
+ "codemirror_mode": {
413
+ "name": "ipython",
414
+ "version": 3
415
+ },
416
+ "file_extension": ".py",
417
+ "mimetype": "text/x-python",
418
+ "name": "python",
419
+ "nbconvert_exporter": "python",
420
+ "pygments_lexer": "ipython3",
421
+ "version": "3.9.18"
422
+ }
423
+ },
424
+ "nbformat": 4,
425
+ "nbformat_minor": 5
426
+ }
qna_prompting.py CHANGED
@@ -7,73 +7,61 @@ import time
7
 
8
  db_path = "./database/mock_qna.sqlite"
9
  qna_question_description = """
10
- Use this tool to extract the chapter number from the body of input text,
11
- thereafter, chapter number will be used as a filtering criteria for
12
- extracting the right questions set from database.
13
- The format of the function argument looks as follow:
14
- It should be in the format with `Chapter_` as prefix.
 
 
 
 
 
 
 
 
15
  Example 1: `Chapter_1` for first chapter
16
  Example 2: For chapter 12 of the textbook, you should return `Chapter_12`
17
  Example 3: `Chapter_5` for fifth chapter
18
- Thereafter, the chapter_n argument will be passed to the function for Q&A question retrieval.
19
  """
20
  qna_answer_description = """
21
- Use this tool to trigger the evaluation of user's provided input with the
22
- correct answer of the Q&A question asked. When user provides answer to the
23
- question asked, they can reply in natural language or giving the alphabet
24
- symbol of which selected answer they think it's most reasonable.
25
- The format of the function argument `user_selected_answer` looks as follow:
26
- It should be in the format with character such as A, B, C and D.
 
 
 
 
 
 
 
 
 
27
  Example 1: User's answer is `a`, it means choice `A`.
28
  Example 2: User's answer is contextually closer to 3rd answer choice, it means `C`.
29
  Example 3: User says last is the answer, it means `D`.
30
- Thereafter, the `user_selected_answer` argument will be passed to the
31
- function for Q&A question evaluation.
32
  """
33
 
34
  class Question_Model(BaseModel):
35
- chapter_n: str = \
36
- Field(...,
37
- pattern=r'^Chapter_\d*$',
38
- description=(
39
- "which chapter to extract, the format of this function argumet"
40
- "is with `Chapter_` as prefix concatenated with chapter number"
41
- "in integer. For example, `Chapter_2`, `Chapter_10`."
42
- "if no chapter number specified or user requested for random question"
43
- "or user has no preference over which chapter of textbook to be tested"
44
- "return `Chapter_0`"
45
- )
46
  )
 
47
  class Answer_Model(BaseModel):
48
- user_selected_answer: str = \
49
- Field(...,
50
- pattern=r'^[ABCD]$',
51
- description=(
52
- "which answer choice `A`, `B`, `C`, `D`"
53
- "user selected. The return format should be"
54
- "in single character such as A, B, C and D."
55
- "if user's answer is contextually closer to a "
56
- "particular answer choice, return the corresponding"
57
- "alphabet A, B, C or D for the answer "
58
- "is closest."
59
- ))
60
 
61
  def get_qna_question(chapter_n: str) -> str:
62
- """
63
- Use this tool to extract the chapter number from the body of input text,
64
- thereafter, chapter number will be used as a filtering criteria for
65
- extracting the right questions set from database.
66
- The format of the function argument looks as follow:
67
- It should be in the format with `Chapter_` as prefix.
68
- Example 1: `Chapter_1` for first chapter
69
- Example 2: For chapter 12 of the textbook, you should return `Chapter_12`
70
- Example 3: `Chapter_5` for fifth chapter
71
- Thereafter, the chapter_n argument will be passed to the function for Q&A question retrieval.
72
- Once the question is retrieved from database, be reminded to ask user the question.
73
- """
74
  con = sqlite3.connect(db_path)
75
  cur = con.cursor()
76
-
77
 
78
  filter_clause = "WHERE a.id IS NULL" if chapter_n == "Chapter_0" else f"WHERE a.id IS NULL AND chapter='{chapter_n}'"
79
  sql_string = """SELECT q.id, question, option_1, option_2, option_3, option_4, q.correct_answer
@@ -92,8 +80,8 @@ def get_qna_question(chapter_n: str) -> str:
92
  option_4 = result[5]
93
  c_answer = result[6]
94
 
95
- qna_str = "Question: \n" + \
96
- "========= \n" + \
97
  question.replace("\\n", "\n") + "\n" + \
98
  "A) " + option_1 + "\n" + \
99
  "B) " + option_2 + "\n" + \
@@ -108,31 +96,17 @@ def get_qna_question(chapter_n: str) -> str:
108
  return qna_str
109
 
110
  def evaluate_qna_answer(user_selected_answer: str) -> str:
111
- """
112
- Use this tool to trigger the evaluation of user's provided input with the
113
- correct answer of the Q&A question asked. When user provides answer to the
114
- question asked, they can reply in natural language or giving the alphabet
115
- symbol of which selected answer they think it's most reasonable.
116
- The format of the function argument `user_selected_answer` looks as follow:
117
- It should be in the format with character such as A, B, C and D.
118
- Example 1: User's answer is `a`, it means choice `A`.
119
- Example 2: User's answer is contextually closer to 3rd answer choice, it means `C`.
120
- Example 3: User says last is the answer, it means `D`.
121
- Thereafter, the `user_selected_answer` argument will be passed to the
122
- function for Q&A question evaluation.
123
- """
124
  answer_mapping = {
125
  "A": 1,
126
  "B": 2,
127
  "C": 3,
128
- "D": 4
 
129
  }
130
  num_mapping = dict((v,k) for k,v in answer_mapping.items())
 
131
 
132
- user_answer_numeric = answer_mapping.get(user_selected_answer, None)
133
- if user_answer_numeric is None:
134
- raise Exception(f"User's answer can't be found: {user_selected_answer}")
135
-
136
  question_id = st.session_state.question_id
137
  qna_answer = st.session_state.qna_answer
138
  qna_answer_alphabet = num_mapping[qna_answer]
@@ -148,21 +122,20 @@ def evaluate_qna_answer(user_selected_answer: str) -> str:
148
  con.close()
149
 
150
  if qna_answer == user_answer_numeric:
151
- st.toast('Hooray!', icon='πŸŽ‰')
152
- time.sleep(0.3)
153
- st.toast('Hooray!', icon='πŸŽ‰')
154
- time.sleep(0.3)
155
- st.toast('Hooray!', icon='πŸŽ‰')
156
  st.balloons()
157
  else:
158
- st.toast('Omg..', icon='πŸ˜…')
159
- time.sleep(0.3)
160
- st.toast('Omg..', icon='πŸ˜…')
161
- time.sleep(0.3)
162
- st.toast('Omg..', icon='πŸ˜…')
163
  st.snow()
164
 
165
-
166
  qna_answer_response = (
167
  f"Your selected answer is `{user_selected_answer}`, "
168
  f"but the actual answer is `{qna_answer_alphabet}`. "
 
7
 
8
  db_path = "./database/mock_qna.sqlite"
9
  qna_question_description = """
10
+ Only trigger this when user wants to be tested with a question.
11
+ Use this tool to extract the chapter number from the body of input text,
12
+ thereafter, chapter number will be used as a filtering criteria for
13
+ extracting the right questions set from database.
14
+
15
+ Thereafter, the chapter_n argument will be passed to the function for Q&A question retrieval.
16
+ If no chapter number specified or user requested for random question,
17
+ or user has no preference over which chapter of textbook to be tested,
18
+ set function argument `chapter_n` to be `Chapter_0`.
19
+ """
20
+ qna_question_data_format = """
21
+ The format of the function argument `chapter_n` looks as follow:
22
+ It should be in the format with `Chapter_` as prefix.
23
  Example 1: `Chapter_1` for first chapter
24
  Example 2: For chapter 12 of the textbook, you should return `Chapter_12`
25
  Example 3: `Chapter_5` for fifth chapter
 
26
  """
27
  qna_answer_description = """
28
+ Use this tool to trigger the evaluation of user's provided input with the
29
+ correct answer of the Q&A question asked. When user provides answer to the
30
+ question asked, they can reply in natural language or giving the alphabet
31
+ letter of which selected choice they think it's the right answer.
32
+
33
+ If user's answer is not a single alphabet letter, but is contextually
34
+ closer to a particular answer choice, return the corresponding
35
+ alphabet A, B, C, D or Z for which the answer's meaning is closest to.
36
+
37
+ Thereafter, the `user_selected_answer` argument will be passed to the
38
+ function for Q&A question evaluation.
39
+ """
40
+ qna_answer_data_format = """
41
+ The format of the function argument `user_selected_answer` looks as follow:
42
+ It should be in the format of single character such as `A`, `B`, `C`, `D` or `Z`.
43
  Example 1: User's answer is `a`, it means choice `A`.
44
  Example 2: User's answer is contextually closer to 3rd answer choice, it means `C`.
45
  Example 3: User says last is the answer, it means `D`.
46
+ Example 4: If user doesn't know about the answer, it means `Z`.
 
47
  """
48
 
49
  class Question_Model(BaseModel):
50
+ chapter_n: str = Field(...,
51
+ pattern=r'^Chapter_\d*$',
52
+ description=qna_question_data_format
 
 
 
 
 
 
 
 
53
  )
54
+
55
  class Answer_Model(BaseModel):
56
+ user_selected_answer: str = Field(...,
57
+ pattern=r'^[ABCDZ]$',
58
+ description=qna_answer_data_format
59
+ )
 
 
 
 
 
 
 
 
60
 
61
  def get_qna_question(chapter_n: str) -> str:
62
+
 
 
 
 
 
 
 
 
 
 
 
63
  con = sqlite3.connect(db_path)
64
  cur = con.cursor()
 
65
 
66
  filter_clause = "WHERE a.id IS NULL" if chapter_n == "Chapter_0" else f"WHERE a.id IS NULL AND chapter='{chapter_n}'"
67
  sql_string = """SELECT q.id, question, option_1, option_2, option_3, option_4, q.correct_answer
 
80
  option_4 = result[5]
81
  c_answer = result[6]
82
 
83
+ qna_str = "As requested, here is the retrieved question: \n" + \
84
+ "============================================= \n" + \
85
  question.replace("\\n", "\n") + "\n" + \
86
  "A) " + option_1 + "\n" + \
87
  "B) " + option_2 + "\n" + \
 
96
  return qna_str
97
 
98
  def evaluate_qna_answer(user_selected_answer: str) -> str:
99
+
 
 
 
 
 
 
 
 
 
 
 
 
100
  answer_mapping = {
101
  "A": 1,
102
  "B": 2,
103
  "C": 3,
104
+ "D": 4,
105
+ "Z": 0
106
  }
107
  num_mapping = dict((v,k) for k,v in answer_mapping.items())
108
+ user_answer_numeric = answer_mapping.get(user_selected_answer, 0)
109
 
 
 
 
 
110
  question_id = st.session_state.question_id
111
  qna_answer = st.session_state.qna_answer
112
  qna_answer_alphabet = num_mapping[qna_answer]
 
122
  con.close()
123
 
124
  if qna_answer == user_answer_numeric:
125
+ st.toast("🍯 yummy yummy, hooray!", icon="πŸŽ‰")
126
+ time.sleep(2)
127
+ st.toast("πŸ»πŸ’•πŸ― You got it right!", icon="🎊")
128
+ time.sleep(2)
129
+ st.toast("πŸ₯‡ You are amazing! πŸ’―πŸ’―", icon="πŸ’ͺ")
130
  st.balloons()
131
  else:
132
+ st.toast("🐼 Something doesn't seem right.. πŸ”₯🏠πŸ”₯", icon="πŸ˜‚")
133
+ time.sleep(2)
134
+ st.toast("πŸ₯Ά Are you sure..? 😬😬", icon="😭")
135
+ time.sleep(2)
136
+ st.toast("πŸ€œπŸ€› Nevertheless, it was a good try!! πŸ‹οΈβ€β™‚οΈπŸ‹οΈβ€β™‚οΈ", icon="πŸ‘")
137
  st.snow()
138
 
 
139
  qna_answer_response = (
140
  f"Your selected answer is `{user_selected_answer}`, "
141
  f"but the actual answer is `{qna_answer_alphabet}`. "
raw_documents/overview_background.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a5e6e0a28727dd6eab4bc18bf5ffcf897a4dbed61a854fa52629d2698f0925
3
+ size 5970
resource/disney-cuties-little-winnie-the-pooh-emoticon.png ADDED
resource/disney-cuties-piglet-emoticon.png ADDED
streamlit_app.py CHANGED
@@ -28,7 +28,7 @@ import nest_asyncio
28
  nest_asyncio.apply()
29
 
30
  # App title
31
- st.set_page_config(page_title="πŸ’¬ Open AI Chatbot")
32
  openai_api = os.getenv("OPENAI_API_KEY")
33
 
34
  # "./raw_documents/HI_Knowledge_Base.pdf"
@@ -38,9 +38,29 @@ input_files = ["./raw_documents/HI Chapter Summary Version 1.3.pdf",
38
  embedding_model = "BAAI/bge-small-en-v1.5"
39
  persisted_vector_db = "./models/chroma_db"
40
  fine_tuned_path = "local:models/fine-tuned-embeddings"
41
- system_content = ("You are a helpful study assistant. "
42
- "You do not respond as 'User' or pretend to be 'User'. "
43
- "You only respond once as 'Assistant'."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  )
45
 
46
  data_df = pd.DataFrame(
@@ -50,10 +70,34 @@ data_df = pd.DataFrame(
50
  )
51
  data_df.index = ["Chapter 1", "Chapter 2", "Chapter 3", "Chapter 4"]
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Replicate Credentials
54
  with st.sidebar:
55
- st.title("πŸ’¬ Open AI Chatbot")
56
- st.write("This chatbot is created using the GPT model from Open AI.")
57
  if openai_api:
58
  pass
59
  elif "OPENAI_API_KEY" in st.secrets:
@@ -71,7 +115,7 @@ with st.sidebar:
71
 
72
  st.subheader("Models and parameters")
73
  selected_model = st.sidebar.selectbox("Choose an OpenAI model",
74
- ["gpt-3.5-turbo-0125", "gpt-4-0125-preview"],
75
  key="selected_model")
76
  temperature = st.sidebar.slider("temperature", min_value=0.0, max_value=2.0,
77
  value=0.0, step=0.01)
@@ -98,7 +142,7 @@ if "init" not in st.session_state.keys():
98
  # Store LLM generated responses
99
  if "messages" not in st.session_state.keys():
100
  st.session_state.messages = [{"role": "assistant",
101
- "content": "How may I assist you today?",
102
  "type": "text"}]
103
 
104
  if "feedback_key" not in st.session_state:
@@ -115,7 +159,7 @@ if "qna_answer" not in st.session_state:
115
 
116
  def clear_chat_history():
117
  st.session_state.messages = [{"role": "assistant",
118
- "content": "How may I assist you today?",
119
  "type": "text"}]
120
  chat_engine = get_query_engine(input_files=input_files,
121
  llm_model=selected_model,
@@ -191,23 +235,25 @@ def get_query_engine(input_files, llm_model, temperature,
191
 
192
  index = VectorStoreIndex(nodes, storage_context=storage_context)
193
 
194
- memory = ChatMemoryBuffer.from_defaults(token_limit=15000)
195
  hi_content_engine = index.as_query_engine(
196
  memory=memory,
197
  system_prompt=system_content,
198
- similarity_top_k=3,
 
199
  streaming=True
200
  )
201
-
202
  hi_textbook_query_description = """
203
- Use this tool to extract content from Health Insurance textbook
 
204
  that has 15 chapters in total. When user wants to learn more about a
205
  particular chapter, this tool will help to assist user to get better
206
- understanding of the content of the textbook.
207
  """
 
208
  hi_query_tool = QueryEngineTool.from_defaults(
209
  query_engine=hi_content_engine,
210
- name="vector_tool",
211
  description=hi_textbook_query_description
212
  )
213
 
@@ -218,7 +264,8 @@ def get_query_engine(input_files, llm_model, temperature,
218
  ],
219
  max_function_calls=1,
220
  llm=llm,
221
- verbose=True)
 
222
  print("loaded AI agent, let's begin the chat!")
223
  print("="*50)
224
  print("")
@@ -277,7 +324,12 @@ with st.sidebar:
277
  for message in st.session_state.messages:
278
  if message["role"] == "admin":
279
  continue
280
- with st.chat_message(message["role"]):
 
 
 
 
 
281
  if message["type"] == "text":
282
  st.write(message["content"])
283
  elif message["type"] == "image":
@@ -286,11 +338,10 @@ for message in st.session_state.messages:
286
 
287
  # User-provided prompt
288
  if prompt := st.chat_input(disabled=not openai_api):
289
- client = OpenAI()
290
  st.session_state.messages.append({"role": "user",
291
  "content": prompt,
292
  "type": "text"})
293
- with st.chat_message("user"):
294
  st.write(prompt)
295
 
296
  # Retrieve text prompt from image submission
@@ -301,17 +352,22 @@ if prompt is None and \
301
 
302
  # Generate a new response if last message is not from assistant
303
  if st.session_state.messages[-1]["role"] != "assistant":
304
- with st.chat_message("assistant"):
305
- with st.spinner("Thinking..."):
306
  if image_prompt:
307
- response = generate_llm_response(prompt, tool_choice="vector_tool")
 
 
 
308
  image_prompt = False
309
  else:
310
  response = generate_llm_response(prompt, tool_choice="auto")
311
  placeholder = st.empty()
312
  full_response = ""
313
  for token in response.response_gen:
314
- token = token.replace("\n", " \n")
 
 
315
  full_response += token
316
  placeholder.markdown(full_response)
317
  placeholder.markdown(full_response)
 
28
  nest_asyncio.apply()
29
 
30
  # App title
31
+ st.set_page_config(page_title="🐻🍯 Study Bear")
32
  openai_api = os.getenv("OPENAI_API_KEY")
33
 
34
  # "./raw_documents/HI_Knowledge_Base.pdf"
 
38
  embedding_model = "BAAI/bge-small-en-v1.5"
39
  persisted_vector_db = "./models/chroma_db"
40
  fine_tuned_path = "local:models/fine-tuned-embeddings"
41
+ system_content = (
42
+ "You are a helpful study assistant. "
43
+ "You do not respond as 'User' or pretend to be 'User'. "
44
+ "You only respond once as 'Assistant'."
45
+ )
46
+ textbook_content = (
47
+ "The content of the textbook `Health Insurance 7th Edition` are as follows,"
48
+ "- Chapter 1: Overview Of Healthcare Environment In Singapore"
49
+ "- Chapter 2: Medical Expense Insurance"
50
+ "- Chapter 3: Group Medical Expense Insurance"
51
+ "- Chapter 4: Disability Income Insurance"
52
+ "- Chapter 5: Long-Term Care Insurance"
53
+ "- Chapter 6: Critical Illness Insurance"
54
+ "- Chapter 7: Other Types Of Health Insurance"
55
+ "- Chapter 8: Managed Healthcare"
56
+ "- Chapter 9: Part I Healthcare Financing"
57
+ "- Chapter 9: Part II Healthcare Financing"
58
+ "- Chapter 10: Common Policy Provisions"
59
+ "- Chapter 11: Health Insurance Pricing"
60
+ "- Chapter 12: Health Insurance Underwriting"
61
+ "- Chapter 13: Notice No: MAS 120 Disclosure And Advisory Process - Requirements For Accident And Health Insurance Products"
62
+ "- Chapter 14: Financial Needs Analysis"
63
+ "- Chapter 15: Case Studies"
64
  )
65
 
66
  data_df = pd.DataFrame(
 
70
  )
71
  data_df.index = ["Chapter 1", "Chapter 2", "Chapter 3", "Chapter 4"]
72
 
73
+ bear_img_path = "./resource/disney-cuties-little-winnie-the-pooh-emoticon.png"
74
+ piglet_img_path = "./resource/disney-cuties-piglet-emoticon.png"
75
+ introduction_line = (
76
+ "Hello, my name is Winnie. I am your `Study Bear` 🐻. \n"
77
+ "Let's study together and pass the exam without worries. \n"
78
+ "As the saying goes: \n"
79
+ "> Any day spent with you is my favorite day. So, today is my new favorite day. \n"
80
+ "> \n"
81
+ "Let me know what should we study today πŸ˜‰. \n"
82
+ " \n"
83
+ "The content of the textbook `Health Insurance 7th Edition` are as follows, \n"
84
+ "- Chapter 1: Overview Of Healthcare Environment In Singapore \n"
85
+ "- Chapter 2: Medical Expense Insurance \n"
86
+ "- Chapter 3: Group Medical Expense Insurance \n"
87
+ "- Chapter 4: Disability Income Insurance \n"
88
+ "- Etc ... \n"
89
+ " \n"
90
+ "For examples, you could ask me \n"
91
+ "- *How many chapters are there in textbook 'Health Insurance 7th Edition'?* \n"
92
+ "- *Can you list all the chapters by name and its number for me?* \n"
93
+ "- *Please extract the important key concept from chapter 1 into 10 bullet points* \n"
94
+ "- *Please ask me a question so that I can tell if I have enough understanding about Chapter 2* \n"
95
+ )
96
+
97
  # Replicate Credentials
98
  with st.sidebar:
99
+ st.title("🍯🐝 Study Bear πŸ»πŸ’­")
100
+ st.write("Just like Pooh needs honey, success requires hard work – no shortcuts allowed!")
101
  if openai_api:
102
  pass
103
  elif "OPENAI_API_KEY" in st.secrets:
 
115
 
116
  st.subheader("Models and parameters")
117
  selected_model = st.sidebar.selectbox("Choose an OpenAI model",
118
+ ["gpt-4-0125-preview", "gpt-3.5-turbo-0125"],
119
  key="selected_model")
120
  temperature = st.sidebar.slider("temperature", min_value=0.0, max_value=2.0,
121
  value=0.0, step=0.01)
 
142
  # Store LLM generated responses
143
  if "messages" not in st.session_state.keys():
144
  st.session_state.messages = [{"role": "assistant",
145
+ "content": introduction_line,
146
  "type": "text"}]
147
 
148
  if "feedback_key" not in st.session_state:
 
159
 
160
  def clear_chat_history():
161
  st.session_state.messages = [{"role": "assistant",
162
+ "content": introduction_line,
163
  "type": "text"}]
164
  chat_engine = get_query_engine(input_files=input_files,
165
  llm_model=selected_model,
 
235
 
236
  index = VectorStoreIndex(nodes, storage_context=storage_context)
237
 
238
+ memory = ChatMemoryBuffer.from_defaults(token_limit=100_000)
239
  hi_content_engine = index.as_query_engine(
240
  memory=memory,
241
  system_prompt=system_content,
242
+ similarity_top_k=10,
243
+ verbose=True,
244
  streaming=True
245
  )
 
246
  hi_textbook_query_description = """
247
+ Use this tool to extract content from the query engine,
248
+ which is built by ingesting textbook content from `Health Insurance 7th Edition`,
249
  that has 15 chapters in total. When user wants to learn more about a
250
  particular chapter, this tool will help to assist user to get better
251
+ understanding of the content of the textbook.
252
  """
253
+
254
  hi_query_tool = QueryEngineTool.from_defaults(
255
  query_engine=hi_content_engine,
256
+ name="health_insurance_textbook_query_engine",
257
  description=hi_textbook_query_description
258
  )
259
 
 
264
  ],
265
  max_function_calls=1,
266
  llm=llm,
267
+ verbose=True,
268
+ system_prompt=textbook_content)
269
  print("loaded AI agent, let's begin the chat!")
270
  print("="*50)
271
  print("")
 
324
  for message in st.session_state.messages:
325
  if message["role"] == "admin":
326
  continue
327
+ elif message["role"] == "user":
328
+ avatar = piglet_img_path
329
+ elif message["role"] == "assistant":
330
+ avatar = bear_img_path
331
+
332
+ with st.chat_message(message["role"], avatar=avatar):
333
  if message["type"] == "text":
334
  st.write(message["content"])
335
  elif message["type"] == "image":
 
338
 
339
  # User-provided prompt
340
  if prompt := st.chat_input(disabled=not openai_api):
 
341
  st.session_state.messages.append({"role": "user",
342
  "content": prompt,
343
  "type": "text"})
344
+ with st.chat_message("user", avatar=piglet_img_path):
345
  st.write(prompt)
346
 
347
  # Retrieve text prompt from image submission
 
352
 
353
  # Generate a new response if last message is not from assistant
354
  if st.session_state.messages[-1]["role"] != "assistant":
355
+ with st.chat_message("assistant", avatar=bear_img_path):
356
+ with st.spinner("πŸ§ΈπŸ’€ Thinking... πŸ»πŸ’­"):
357
  if image_prompt:
358
+ response = generate_llm_response(
359
+ prompt,
360
+ tool_choice="health_insurance_textbook_query_engine"
361
+ )
362
  image_prompt = False
363
  else:
364
  response = generate_llm_response(prompt, tool_choice="auto")
365
  placeholder = st.empty()
366
  full_response = ""
367
  for token in response.response_gen:
368
+ token = token.replace("\n", " \n") \
369
+ .replace("$", "\$") \
370
+ .replace("\[", "$$")
371
  full_response += token
372
  placeholder.markdown(full_response)
373
  placeholder.markdown(full_response)