Spaces:
Sleeping
Sleeping
Commit
·
1679e12
1
Parent(s):
386ceec
Upload 6 files
Browse files- .gitattributes +1 -0
- 100_test_docs_persist/docstore.json +0 -0
- 100_test_docs_persist/graph_store.json +1 -0
- 100_test_docs_persist/index_store.json +0 -0
- 100_test_docs_persist/vector_store.json +3 -0
- app.py +75 -42
- requirements.txt +1 -0
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
test_index_persist/vector_store.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
test_index_persist/vector_store.json filter=lfs diff=lfs merge=lfs -text
|
37 |
+
100_test_docs_persist/vector_store.json filter=lfs diff=lfs merge=lfs -text
|
100_test_docs_persist/docstore.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
100_test_docs_persist/graph_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"graph_dict": {}}
|
100_test_docs_persist/index_store.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
100_test_docs_persist/vector_store.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:feb2826fe8f6ba32acd8b2545e07d089ffd0d876dd80a8e27df8300c987af9f8
|
3 |
+
size 62556594
|
app.py
CHANGED
@@ -1,70 +1,103 @@
|
|
1 |
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, set_global_service_context, load_index_from_storage, StorageContext, PromptHelper
|
2 |
from llama_index.llms import OpenAI
|
3 |
from langchain.chat_models import ChatOpenAI
|
|
|
4 |
import gradio as gr
|
5 |
import sys
|
6 |
import os
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
# chunk_size_limit = 600
|
14 |
|
15 |
-
#
|
16 |
-
|
17 |
-
#
|
18 |
|
19 |
-
|
20 |
-
|
|
|
|
|
21 |
|
22 |
-
#
|
23 |
-
|
24 |
|
25 |
-
#
|
26 |
-
|
27 |
|
28 |
-
#
|
29 |
-
|
30 |
-
|
31 |
|
32 |
-
# # Parse documents into nodes
|
33 |
-
# # parser = SimpleNodeParser.from_defaults()
|
34 |
-
# # nodes = parser.get_nodes_from_documents(documents)
|
35 |
|
36 |
-
#
|
37 |
-
#
|
|
|
38 |
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
43 |
|
44 |
-
|
45 |
-
INDEX = load_index_from_storage(STORAGE_CONTEXT)
|
46 |
QE = INDEX.as_query_engine()
|
|
|
47 |
|
48 |
-
def
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
for chat in chat_history[~max_chat_length:]:
|
51 |
user_chat, bot_chat = chat
|
52 |
prompt = f"{prompt}\nUser: {user_chat}\nAssistant: {bot_chat}"
|
53 |
-
prompt = f"{prompt}\nUser: {
|
54 |
-
|
55 |
-
|
56 |
-
def chatfunc(input_text, chat_history):
|
57 |
-
response = QE.query(input_text)
|
58 |
chat_history.append([input_text, response.response])
|
59 |
return "", chat_history
|
60 |
|
61 |
with gr.Blocks() as iface:
|
62 |
chatbot = gr.Chatbot(height=400)
|
63 |
-
msg = gr.Textbox(label="Ask the Standard Bot anything about curriculum standards")
|
64 |
-
submit = gr.Button("Submit")
|
65 |
with gr.Row():
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
1 |
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, set_global_service_context, load_index_from_storage, StorageContext, PromptHelper
|
2 |
from llama_index.llms import OpenAI
|
3 |
from langchain.chat_models import ChatOpenAI
|
4 |
+
from PyPDF2 import PdfReader
|
5 |
import gradio as gr
|
6 |
import sys
|
7 |
import os
|
8 |
|
9 |
+
try:
|
10 |
+
from config import OPEN_AI_KEY
|
11 |
+
os.environ["OPENAI_API_KEY"] = OPEN_AI_KEY
|
12 |
+
except:
|
13 |
+
pass
|
|
|
14 |
|
15 |
+
# ===============================
|
16 |
+
# Settings
|
17 |
+
# ===============================
|
18 |
|
19 |
+
MAX_INPUT_SIZE = 4096
|
20 |
+
NUM_OUTPUT = 700
|
21 |
+
CHUNK_OVERLAP_RATIO = 0.15
|
22 |
+
CHUNK_SIZE_LIMIT = 600
|
23 |
|
24 |
+
# Define LLM: gpt-3.5-turbo, temp:0.7
|
25 |
+
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.7, max_tokens=NUM_OUTPUT)
|
26 |
|
27 |
+
# Define prompt helper
|
28 |
+
prompt_helper = PromptHelper(context_window=MAX_INPUT_SIZE, num_output=NUM_OUTPUT, chunk_overlap_ratio=CHUNK_OVERLAP_RATIO, chunk_size_limit=CHUNK_SIZE_LIMIT)
|
29 |
|
30 |
+
# Set service context
|
31 |
+
service_context = ServiceContext.from_defaults(llm=llm, prompt_helper=prompt_helper)
|
32 |
+
set_global_service_context(service_context)
|
33 |
|
|
|
|
|
|
|
34 |
|
35 |
+
# ===============================
|
36 |
+
# Functions
|
37 |
+
# ===============================
|
38 |
|
39 |
+
def construct_index(directory_path, index_path):
|
40 |
+
|
41 |
+
if os.listdir(index_path) != []:
|
42 |
+
storage_context = StorageContext.from_defaults(persist_dir=index_path)
|
43 |
+
index = load_index_from_storage(storage_context)
|
44 |
+
return index
|
45 |
+
|
46 |
+
else:
|
47 |
+
# Load in documents
|
48 |
+
documents = SimpleDirectoryReader(directory_path).load_data()
|
49 |
+
|
50 |
+
# Index documents
|
51 |
+
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
|
52 |
+
|
53 |
+
# Save index
|
54 |
+
index.storage_context.persist(persist_dir=index_path)
|
55 |
|
56 |
+
return index
|
57 |
|
58 |
+
INDEX = construct_index("100_test_docs", "100_test_docs_persist")
|
|
|
59 |
QE = INDEX.as_query_engine()
|
60 |
+
PDF_CONTENT = gr.State("")
|
61 |
|
62 |
+
def upload_file(file):
|
63 |
+
try:
|
64 |
+
read_pdf = PdfReader(file.name)
|
65 |
+
pdf_text = "\n\n".join([w.extract_text() for w in read_pdf.pages])
|
66 |
+
PDF_CONTENT.value = pdf_text
|
67 |
+
return pdf_text
|
68 |
+
except Exception as e:
|
69 |
+
return f"Error: {str(e)}"
|
70 |
+
|
71 |
+
def chatfunc(input_text, chat_history, max_chat_length=6):
|
72 |
+
prompt = """
|
73 |
+
<|SYSTEM|># You are the Common Core State Standards Bot; or CCSSB in short.
|
74 |
+
- CCSSB is a helpful model that helps the User learn about Common Core State Standards.
|
75 |
+
- CCSSB can access a vector data base of reports on how specific contents align to Common Core State Standards.
|
76 |
+
- Users will sometimes provide CCSSB with their syllabus or homework and ask CCSSB how they align to Common Core State Standards.
|
77 |
+
- CCSSB will use the database as much as it can to answer Users' questions with as much detail as possible with specific attention to Common Core State Standards.
|
78 |
+
- CCSSB will be provided with its past conversation with Users. CCSSB can use this chat history to answer questions specific to the User.
|
79 |
+
\n\n
|
80 |
+
"""
|
81 |
+
if PDF_CONTENT.value:
|
82 |
+
prompt = prompt + "The following is the syllabus provided by the user" + PDF_CONTENT.value + "\n\n"
|
83 |
for chat in chat_history[~max_chat_length:]:
|
84 |
user_chat, bot_chat = chat
|
85 |
prompt = f"{prompt}\nUser: {user_chat}\nAssistant: {bot_chat}"
|
86 |
+
prompt = f"{prompt}\nUser: {input_text}\nAssistant:"
|
87 |
+
response = QE.query(prompt)
|
|
|
|
|
|
|
88 |
chat_history.append([input_text, response.response])
|
89 |
return "", chat_history
|
90 |
|
91 |
with gr.Blocks() as iface:
|
92 |
chatbot = gr.Chatbot(height=400)
|
93 |
+
msg = gr.Textbox(label="Ask the Common Core State Standard Bot anything about curriculum standards")
|
94 |
+
submit = gr.Button("💬 Submit")
|
95 |
with gr.Row():
|
96 |
+
clear = gr.ClearButton(value="🧹 Clear outputs", components=[msg, chatbot])
|
97 |
+
upload_button = gr.UploadButton("📁 Upload a Syllabus", file_types=[".pdf"], file_count="single")
|
98 |
+
with gr.Accordion("📝 View your syllabus"):
|
99 |
+
syl = gr.Textbox(label="Your syllabus' content will show here")
|
100 |
+
msg.submit(chatfunc, [msg, chatbot], [msg, chatbot])
|
101 |
+
upload_button.upload(upload_file, upload_button, syl)
|
102 |
+
|
103 |
+
iface.launch(share=False)
|
requirements.txt
CHANGED
@@ -83,3 +83,4 @@ uvicorn==0.23.2
|
|
83 |
websockets==11.0.3
|
84 |
yarl==1.9.2
|
85 |
zipp==3.17.0
|
|
|
|
83 |
websockets==11.0.3
|
84 |
yarl==1.9.2
|
85 |
zipp==3.17.0
|
86 |
+
pypdf==3.16.4
|