Spaces:

facehugger92
/

standards

Sleeping

App Files Files Community

facehugger92 commited on Oct 16, 2023

Commit

1679e12

1 Parent(s): 386ceec

Upload 6 files

Browse files

Files changed (7) hide show

.gitattributes +1 -0
100_test_docs_persist/docstore.json +0 -0
100_test_docs_persist/graph_store.json +1 -0
100_test_docs_persist/index_store.json +0 -0
100_test_docs_persist/vector_store.json +3 -0
app.py +75 -42
requirements.txt +1 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 test_index_persist/vector_store.json filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 test_index_persist/vector_store.json filter=lfs diff=lfs merge=lfs -text
+100_test_docs_persist/vector_store.json filter=lfs diff=lfs merge=lfs -text

100_test_docs_persist/docstore.json ADDED Viewed

The diff for this file is too large to render. See raw diff

100_test_docs_persist/graph_store.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"graph_dict": {}}

100_test_docs_persist/index_store.json ADDED Viewed

The diff for this file is too large to render. See raw diff

100_test_docs_persist/vector_store.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:feb2826fe8f6ba32acd8b2545e07d089ffd0d876dd80a8e27df8300c987af9f8
+size 62556594

app.py CHANGED Viewed

@@ -1,70 +1,103 @@
 from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, set_global_service_context, load_index_from_storage, StorageContext, PromptHelper
 from llama_index.llms import OpenAI
 from langchain.chat_models import ChatOpenAI
 import gradio as gr
 import sys
 import os
-# Disabled for HF
-# def construct_index(directory_path, index_path):
-#     max_input_size = 4096
-#     num_outputs = 512
-#     chunk_overlap_ratio = 0.2
-#     chunk_size_limit = 600
-#     if os.listdir(index_path) != []:
-#         return index
-#     # Load in documents
-#     documents = SimpleDirectoryReader(directory_path).load_data()
-#     # Define LLM: gpt-3.5-turbo, temp:0.7
-#     llm = OpenAI(model="gpt-3.5-turbo", temperature=0.7, max_tokens=num_outputs)
-#     # Define prompt helper
-#     prompt_helper = PromptHelper(context_window=max_input_size, num_output=num_outputs, chunk_overlap_ratio=chunk_overlap_ratio, chunk_size_limit=chunk_size_limit)
-#     # Set service context
-#     service_context = ServiceContext.from_defaults(llm=llm, prompt_helper=prompt_helper)
-#     set_global_service_context(service_context)
-#     # Parse documents into nodes
-#     # parser = SimpleNodeParser.from_defaults()
-#     # nodes = parser.get_nodes_from_documents(documents)
-#     # Index documents
-#     index = GPTVectorStoreIndex.from_documents(documents)
-#     # Save index
-#     index.storage_context.persist(persist_dir=index_path)
-#     return index
-STORAGE_CONTEXT = StorageContext.from_defaults(persist_dir="test_index_persist")
-INDEX = load_index_from_storage(STORAGE_CONTEXT)
 QE = INDEX.as_query_engine()
-def format_chat(message, chat_history, max_chat_length=10):
-    prompt = ""
     for chat in chat_history[~max_chat_length:]:
         user_chat, bot_chat = chat
         prompt = f"{prompt}\nUser: {user_chat}\nAssistant: {bot_chat}"
-    prompt = f"{prompt}\nUser: {message}\nAssistant:"
-    return prompt
-def chatfunc(input_text, chat_history):
-    response = QE.query(input_text)
     chat_history.append([input_text, response.response])
     return "", chat_history
 with gr.Blocks() as iface:
     chatbot = gr.Chatbot(height=400)
-    msg = gr.Textbox(label="Ask the Standard Bot anything about curriculum standards")
-    submit = gr.Button("Submit")
     with gr.Row():
-        clear = gr.ClearButton(components=[msg, chatbot])
-        msg.submit(chatfunc, [msg, chatbot], [msg, chatbot])
-iface.launch(share=False, debug=True)

 from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, set_global_service_context, load_index_from_storage, StorageContext, PromptHelper
 from llama_index.llms import OpenAI
 from langchain.chat_models import ChatOpenAI
+from PyPDF2 import PdfReader
 import gradio as gr
 import sys
 import os
+try:
+    from config import OPEN_AI_KEY
+    os.environ["OPENAI_API_KEY"] = OPEN_AI_KEY
+except:
+    pass
+# ===============================
+# Settings
+# ===============================
+MAX_INPUT_SIZE = 4096
+NUM_OUTPUT = 700
+CHUNK_OVERLAP_RATIO = 0.15
+CHUNK_SIZE_LIMIT = 600
+# Define LLM: gpt-3.5-turbo, temp:0.7
+llm = OpenAI(model="gpt-3.5-turbo", temperature=0.7, max_tokens=NUM_OUTPUT)
+# Define prompt helper
+prompt_helper = PromptHelper(context_window=MAX_INPUT_SIZE, num_output=NUM_OUTPUT, chunk_overlap_ratio=CHUNK_OVERLAP_RATIO, chunk_size_limit=CHUNK_SIZE_LIMIT)
+# Set service context
+service_context = ServiceContext.from_defaults(llm=llm, prompt_helper=prompt_helper)
+set_global_service_context(service_context)
+# ===============================
+# Functions
+# ===============================
+def construct_index(directory_path, index_path):
+    if os.listdir(index_path) != []:
+        storage_context = StorageContext.from_defaults(persist_dir=index_path)
+        index = load_index_from_storage(storage_context)
+        return index
+    else:
+        # Load in documents
+        documents = SimpleDirectoryReader(directory_path).load_data()
+        # Index documents
+        index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
+        # Save index
+        index.storage_context.persist(persist_dir=index_path)
+        return index
+INDEX = construct_index("100_test_docs", "100_test_docs_persist")
 QE = INDEX.as_query_engine()
+PDF_CONTENT = gr.State("")
+def upload_file(file):
+    try:
+        read_pdf = PdfReader(file.name)
+        pdf_text = "\n\n".join([w.extract_text() for w in read_pdf.pages])
+        PDF_CONTENT.value = pdf_text
+        return pdf_text
+    except Exception as e:
+        return f"Error: {str(e)}"
+def chatfunc(input_text, chat_history, max_chat_length=6):
+    prompt = """
+                <|SYSTEM|># You are the Common Core State Standards Bot; or CCSSB in short.
+                - CCSSB is a helpful model that helps the User learn about Common Core State Standards.
+                - CCSSB can access a vector data base of reports on how specific contents align to Common Core State Standards.
+                - Users will sometimes provide CCSSB with their syllabus or homework and ask CCSSB how they align to Common Core State Standards.
+                - CCSSB will use the database as much as it can to answer Users' questions with as much detail as possible with specific attention to Common Core State Standards.
+                - CCSSB will be provided with its past conversation with Users. CCSSB can use this chat history to answer questions specific to the User.
+                \n\n
+             """
+    if PDF_CONTENT.value:
+        prompt = prompt + "The following is the syllabus provided by the user" + PDF_CONTENT.value + "\n\n"
     for chat in chat_history[~max_chat_length:]:
         user_chat, bot_chat = chat
         prompt = f"{prompt}\nUser: {user_chat}\nAssistant: {bot_chat}"
+    prompt = f"{prompt}\nUser: {input_text}\nAssistant:"
+    response = QE.query(prompt)
     chat_history.append([input_text, response.response])
     return "", chat_history
 with gr.Blocks() as iface:
     chatbot = gr.Chatbot(height=400)
+    msg = gr.Textbox(label="Ask the Common Core State Standard Bot anything about curriculum standards")
+    submit = gr.Button("💬 Submit")
     with gr.Row():
+        clear = gr.ClearButton(value="🧹 Clear outputs", components=[msg, chatbot])
+        upload_button = gr.UploadButton("📁 Upload a Syllabus", file_types=[".pdf"], file_count="single")
+    with gr.Accordion("📝 View your syllabus"):
+        syl = gr.Textbox(label="Your syllabus' content will show here")
+    msg.submit(chatfunc, [msg, chatbot], [msg, chatbot])
+    upload_button.upload(upload_file, upload_button, syl)
+iface.launch(share=False)

requirements.txt CHANGED Viewed

@@ -83,3 +83,4 @@ uvicorn==0.23.2
 websockets==11.0.3
 yarl==1.9.2
 zipp==3.17.0

 websockets==11.0.3
 yarl==1.9.2
 zipp==3.17.0
+pypdf==3.16.4