Spaces:
Runtime error
Runtime error
map reduce to answer summary based questions
Browse files
app.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import asyncio
|
|
|
2 |
import os
|
3 |
import time
|
4 |
-
|
5 |
import gradio as gr
|
6 |
from dotenv import load_dotenv
|
7 |
from langchain.chat_models import ChatOpenAI
|
@@ -17,7 +18,7 @@ pickle_file = "vector_stores/canvas-discussions.pkl"
|
|
17 |
index_file = "vector_stores/canvas-discussions.index"
|
18 |
|
19 |
grading_model = 'gpt-4'
|
20 |
-
qa_model = 'gpt-
|
21 |
|
22 |
llm = ChatOpenAI(model_name=qa_model, temperature=0, verbose=True)
|
23 |
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
|
@@ -28,7 +29,6 @@ grader_qa = None
|
|
28 |
|
29 |
def add_text(history, text):
|
30 |
print("Question asked: " + text)
|
31 |
-
get_grading_status(history)
|
32 |
response = run_model(text)
|
33 |
history = history + [(text, response)]
|
34 |
print(history)
|
@@ -43,16 +43,16 @@ def run_model(text):
|
|
43 |
sources = []
|
44 |
for document in response['source_documents']:
|
45 |
sources.append(str(document.metadata))
|
46 |
-
print(sources)
|
47 |
|
48 |
source = ','.join(set(sources))
|
49 |
-
response = response['answer'] + '\nSources: ' +
|
50 |
end_time = time.time()
|
51 |
# # If response contains string `SOURCES:`, then add a \n before `SOURCES`
|
52 |
# if "SOURCES:" in response:
|
53 |
# response = response.replace("SOURCES:", "\nSOURCES:")
|
54 |
response = response + "\n\n" + "Time taken: " + str(end_time - start_time)
|
55 |
print(response)
|
|
|
56 |
print("Time taken: " + str(end_time - start_time))
|
57 |
return response
|
58 |
|
@@ -68,16 +68,15 @@ def ingest(url, canvas_api_key, history):
|
|
68 |
grader = Grader(grading_model)
|
69 |
response = "Ingested canvas data successfully"
|
70 |
history = history + [(text, response)]
|
71 |
-
return
|
|
|
72 |
|
73 |
-
def start_grading(
|
74 |
global grader, grader_qa
|
75 |
text = f"Start grading discussions from {url}"
|
76 |
-
if
|
77 |
-
|
78 |
-
|
79 |
-
if grader.llm.model_name != grading_model:
|
80 |
-
grader = Grader(grading_model)
|
81 |
# Create a new event loop
|
82 |
loop = asyncio.new_event_loop()
|
83 |
asyncio.set_event_loop(loop)
|
@@ -108,26 +107,28 @@ def get_first_message(history):
|
|
108 |
global grader_qa
|
109 |
history = [(None,
|
110 |
'Get feedback on your canvas discussions. Add your discussion url and get your discussions graded in instantly.')]
|
111 |
-
|
112 |
-
return history
|
113 |
|
114 |
|
115 |
def get_grading_status(history):
|
116 |
global grader, grader_qa
|
117 |
# Check if grading is complete
|
118 |
-
if os.path.isdir('output') and len(glob.glob("
|
|
|
119 |
if not grader:
|
120 |
grader = Grader(qa_model)
|
121 |
grader_qa = GraderQA(grader, embeddings)
|
122 |
elif not grader_qa:
|
123 |
grader_qa = GraderQA(grader, embeddings)
|
124 |
-
|
|
|
125 |
enable_fields(False, False, False, False, True, True, True)
|
126 |
# Check if data is ingested
|
127 |
elif len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")):
|
128 |
if not grader_qa:
|
129 |
grader = Grader(qa_model)
|
130 |
-
|
|
|
131 |
enable_fields(False, False, False, True, True, False, False)
|
132 |
else:
|
133 |
history = history + [(None, 'Please ingest data and start grading')]
|
@@ -157,7 +158,7 @@ def enable_fields(url_status, canvas_api_key_status, submit_status, grade_status
|
|
157 |
|
158 |
|
159 |
def bot(history):
|
160 |
-
return history
|
161 |
|
162 |
|
163 |
with gr.Blocks() as demo:
|
@@ -196,7 +197,7 @@ with gr.Blocks() as demo:
|
|
196 |
bot, chatbot, chatbot
|
197 |
)
|
198 |
|
199 |
-
grade.click(start_grading, inputs=[
|
200 |
postprocess=False).then(
|
201 |
bot, chatbot, chatbot
|
202 |
)
|
@@ -213,8 +214,6 @@ with gr.Blocks() as demo:
|
|
213 |
bot, chatbot, chatbot
|
214 |
)
|
215 |
|
216 |
-
set_model(chatbot)
|
217 |
-
|
218 |
if __name__ == "__main__":
|
219 |
demo.queue()
|
220 |
demo.queue(concurrency_count=5)
|
|
|
1 |
import asyncio
|
2 |
+
import glob
|
3 |
import os
|
4 |
import time
|
5 |
+
|
6 |
import gradio as gr
|
7 |
from dotenv import load_dotenv
|
8 |
from langchain.chat_models import ChatOpenAI
|
|
|
18 |
index_file = "vector_stores/canvas-discussions.index"
|
19 |
|
20 |
grading_model = 'gpt-4'
|
21 |
+
qa_model = 'gpt-4'
|
22 |
|
23 |
llm = ChatOpenAI(model_name=qa_model, temperature=0, verbose=True)
|
24 |
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
|
|
|
29 |
|
30 |
def add_text(history, text):
|
31 |
print("Question asked: " + text)
|
|
|
32 |
response = run_model(text)
|
33 |
history = history + [(text, response)]
|
34 |
print(history)
|
|
|
43 |
sources = []
|
44 |
for document in response['source_documents']:
|
45 |
sources.append(str(document.metadata))
|
|
|
46 |
|
47 |
source = ','.join(set(sources))
|
48 |
+
response = response['answer'] + '\nSources: ' + str(len(sources))
|
49 |
end_time = time.time()
|
50 |
# # If response contains string `SOURCES:`, then add a \n before `SOURCES`
|
51 |
# if "SOURCES:" in response:
|
52 |
# response = response.replace("SOURCES:", "\nSOURCES:")
|
53 |
response = response + "\n\n" + "Time taken: " + str(end_time - start_time)
|
54 |
print(response)
|
55 |
+
print(sources)
|
56 |
print("Time taken: " + str(end_time - start_time))
|
57 |
return response
|
58 |
|
|
|
68 |
grader = Grader(grading_model)
|
69 |
response = "Ingested canvas data successfully"
|
70 |
history = history + [(text, response)]
|
71 |
+
return history
|
72 |
+
|
73 |
|
74 |
+
def start_grading(history):
|
75 |
global grader, grader_qa
|
76 |
text = f"Start grading discussions from {url}"
|
77 |
+
if grader:
|
78 |
+
# if grader.llm.model_name != grading_model:
|
79 |
+
# grader = Grader(grading_model)
|
|
|
|
|
80 |
# Create a new event loop
|
81 |
loop = asyncio.new_event_loop()
|
82 |
asyncio.set_event_loop(loop)
|
|
|
107 |
global grader_qa
|
108 |
history = [(None,
|
109 |
'Get feedback on your canvas discussions. Add your discussion url and get your discussions graded in instantly.')]
|
110 |
+
return get_grading_status(history)
|
|
|
111 |
|
112 |
|
113 |
def get_grading_status(history):
|
114 |
global grader, grader_qa
|
115 |
# Check if grading is complete
|
116 |
+
if os.path.isdir('output') and len(glob.glob("output/*.csv")) > 0 and len(glob.glob("docs/*.json")) > 0 and len(
|
117 |
+
glob.glob("docs/*.html")) > 0:
|
118 |
if not grader:
|
119 |
grader = Grader(qa_model)
|
120 |
grader_qa = GraderQA(grader, embeddings)
|
121 |
elif not grader_qa:
|
122 |
grader_qa = GraderQA(grader, embeddings)
|
123 |
+
if len(history) == 1:
|
124 |
+
history = history + [(None, 'Grading is already complete. You can now ask questions')]
|
125 |
enable_fields(False, False, False, False, True, True, True)
|
126 |
# Check if data is ingested
|
127 |
elif len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")):
|
128 |
if not grader_qa:
|
129 |
grader = Grader(qa_model)
|
130 |
+
if len(history) == 1:
|
131 |
+
history = history + [(None, 'Canvas data is already ingested. You can grade discussions now')]
|
132 |
enable_fields(False, False, False, True, True, False, False)
|
133 |
else:
|
134 |
history = history + [(None, 'Please ingest data and start grading')]
|
|
|
158 |
|
159 |
|
160 |
def bot(history):
|
161 |
+
return get_grading_status(history)
|
162 |
|
163 |
|
164 |
with gr.Blocks() as demo:
|
|
|
197 |
bot, chatbot, chatbot
|
198 |
)
|
199 |
|
200 |
+
grade.click(start_grading, inputs=[chatbot], outputs=[chatbot],
|
201 |
postprocess=False).then(
|
202 |
bot, chatbot, chatbot
|
203 |
)
|
|
|
214 |
bot, chatbot, chatbot
|
215 |
)
|
216 |
|
|
|
|
|
217 |
if __name__ == "__main__":
|
218 |
demo.queue()
|
219 |
demo.queue(concurrency_count=5)
|
grader.py
CHANGED
@@ -2,23 +2,19 @@ import asyncio
|
|
2 |
import csv
|
3 |
import glob
|
4 |
import json
|
|
|
5 |
import shutil
|
6 |
from datetime import datetime
|
7 |
from typing import Optional
|
8 |
|
9 |
from langchain import PromptTemplate
|
10 |
-
from langchain.chains import LLMChain
|
11 |
-
from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain, ReduceDocumentsChain
|
12 |
-
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
13 |
-
from langchain.chains.summarize import load_summarize_chain
|
14 |
from langchain.chat_models import ChatOpenAI
|
15 |
from langchain.document_loaders import DirectoryLoader, UnstructuredHTMLLoader
|
16 |
from langchain.output_parsers import PydanticOutputParser
|
17 |
-
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, Language
|
18 |
from pathvalidate import sanitize_filename
|
19 |
from pydantic import BaseModel, Field
|
20 |
from tqdm import tqdm
|
21 |
-
import os
|
22 |
|
23 |
|
24 |
class Grader:
|
@@ -69,16 +65,16 @@ class Grader:
|
|
69 |
self.title = None # Initialize title
|
70 |
for r in rubric:
|
71 |
if 'description' in r and 'ratings' in r:
|
72 |
-
rubric_text.append(f"
|
73 |
-
[f"
|
74 |
elif 'points_possible' in r:
|
75 |
-
rubric_text.append(f"
|
76 |
print("added points_possible")
|
77 |
elif 'title' in r: # Check if title exists in rubric
|
78 |
self.title = r['title'] # Save title for later use
|
79 |
-
rubric_text.append(f"
|
80 |
elif 'instruction' in r:
|
81 |
-
rubric_text.append(f"
|
82 |
|
83 |
rubric_text = "\n".join(rubric_text)
|
84 |
# print(rubric_text) Add this to log when moving to application
|
@@ -100,7 +96,7 @@ class Grader:
|
|
100 |
def create_reduce_prompt(self):
|
101 |
reduce_template_string = f"""I am a Canvas Discussion Grader! I am here to grade the following summarized sections of canvas discussion responses of the student on the basis of instructions and rubric provided.
|
102 |
--------------------
|
103 |
-
To grade student discussion, I will
|
104 |
{self.rubric_text}
|
105 |
--------------------
|
106 |
I will be able to identify each student by name, their key interests, key features pertinent to the discussion intruction and rubric.
|
|
|
2 |
import csv
|
3 |
import glob
|
4 |
import json
|
5 |
+
import os
|
6 |
import shutil
|
7 |
from datetime import datetime
|
8 |
from typing import Optional
|
9 |
|
10 |
from langchain import PromptTemplate
|
11 |
+
from langchain.chains import LLMChain
|
|
|
|
|
|
|
12 |
from langchain.chat_models import ChatOpenAI
|
13 |
from langchain.document_loaders import DirectoryLoader, UnstructuredHTMLLoader
|
14 |
from langchain.output_parsers import PydanticOutputParser
|
|
|
15 |
from pathvalidate import sanitize_filename
|
16 |
from pydantic import BaseModel, Field
|
17 |
from tqdm import tqdm
|
|
|
18 |
|
19 |
|
20 |
class Grader:
|
|
|
65 |
self.title = None # Initialize title
|
66 |
for r in rubric:
|
67 |
if 'description' in r and 'ratings' in r:
|
68 |
+
rubric_text.append(f"RUBRIC CATEGORY: {r['description']}\n" + "\n".join(
|
69 |
+
[f"POINTS: {rating['points']} CRITERIA: {rating['description']}" for rating in r['ratings']]))
|
70 |
elif 'points_possible' in r:
|
71 |
+
rubric_text.append(f"MAX POINTS POSSIBLE: {r['points_possible']}")
|
72 |
print("added points_possible")
|
73 |
elif 'title' in r: # Check if title exists in rubric
|
74 |
self.title = r['title'] # Save title for later use
|
75 |
+
rubric_text.append(f"TITLE: {self.title}")
|
76 |
elif 'instruction' in r:
|
77 |
+
rubric_text.append(f"DISCUSSION INSTRUCTIONS: {r['instruction']}")
|
78 |
|
79 |
rubric_text = "\n".join(rubric_text)
|
80 |
# print(rubric_text) Add this to log when moving to application
|
|
|
96 |
def create_reduce_prompt(self):
|
97 |
reduce_template_string = f"""I am a Canvas Discussion Grader! I am here to grade the following summarized sections of canvas discussion responses of the student on the basis of instructions and rubric provided.
|
98 |
--------------------
|
99 |
+
To grade student discussion, I will use the discussion instructions and rubric below. I will not deviate from the grading scheme.
|
100 |
{self.rubric_text}
|
101 |
--------------------
|
102 |
I will be able to identify each student by name, their key interests, key features pertinent to the discussion intruction and rubric.
|
utils.py
CHANGED
@@ -2,12 +2,11 @@ import os
|
|
2 |
|
3 |
from langchain import FAISS
|
4 |
from langchain.chains import ConversationalRetrievalChain
|
5 |
-
from langchain.
|
6 |
-
from langchain.
|
|
|
7 |
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
|
8 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
-
|
10 |
-
from grader import Grader
|
11 |
|
12 |
|
13 |
def search_index_from_docs(source_chunks, embeddings):
|
@@ -86,28 +85,74 @@ class GraderQA:
|
|
86 |
def create_chain(self, embeddings):
|
87 |
if not self.search_index:
|
88 |
self.search_index = self.load_index(embeddings)
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
93 |
return_source_documents=True,
|
94 |
verbose=True,
|
95 |
-
memory=
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
return chain
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
def create_prompt(self):
|
105 |
-
system_template = f"""You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the
|
106 |
-
You are a grading assistant who graded the canvas discussions to create the following grading results and feedback.
|
107 |
-
Use the following
|
108 |
----------------
|
109 |
{self.rubric_text}
|
110 |
----------------
|
|
|
111 |
{{context}}"""
|
112 |
messages = [
|
113 |
SystemMessagePromptTemplate.from_template(system_template),
|
|
|
2 |
|
3 |
from langchain import FAISS
|
4 |
from langchain.chains import ConversationalRetrievalChain
|
5 |
+
from langchain.chat_models import ChatOpenAI
|
6 |
+
from langchain.document_loaders import CSVLoader
|
7 |
+
from langchain.memory import ConversationBufferMemory
|
8 |
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
|
9 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
|
10 |
|
11 |
|
12 |
def search_index_from_docs(source_chunks, embeddings):
|
|
|
85 |
def create_chain(self, embeddings):
|
86 |
if not self.search_index:
|
87 |
self.search_index = self.load_index(embeddings)
|
88 |
+
|
89 |
+
question_prompt, combine_prompt = self.create_map_reduce_prompt()
|
90 |
+
|
91 |
+
chain = ConversationalRetrievalChain.from_llm(llm=self.llm, chain_type='map_reduce',
|
92 |
+
retriever=self.search_index.as_retriever(search_type='mmr',
|
93 |
+
search_kwargs={
|
94 |
+
'lambda_mult': 1,
|
95 |
+
'fetch_k': 50,
|
96 |
+
'k': 30}),
|
97 |
return_source_documents=True,
|
98 |
verbose=True,
|
99 |
+
memory=ConversationBufferMemory(memory_key='chat_history',
|
100 |
+
return_messages=True,
|
101 |
+
output_key='answer'),
|
102 |
+
condense_question_llm=ChatOpenAI(temperature=0,
|
103 |
+
model='gpt-3.5-turbo'),
|
104 |
+
combine_docs_chain_kwargs={"question_prompt": question_prompt,
|
105 |
+
"combine_prompt": combine_prompt})
|
106 |
return chain
|
107 |
|
108 |
+
def create_map_reduce_prompt(self):
|
109 |
+
system_template = f"""Use the following portion of a long grading results document to answer the question BUT ONLY FOR THE STUDENT MENTIONED. Use the following examples to take guidance on how to answer the question.
|
110 |
+
Examples:
|
111 |
+
Question: How many students participated in the discussion?
|
112 |
+
Answer: This student participated in the discussion./This student did not participate in the discussion.
|
113 |
+
Question: What was the average score for the discussion?
|
114 |
+
Answer: This student received a score of 10/10 for the discussion.
|
115 |
+
Question: How many students received a full score?/How many students did not receive a full score?
|
116 |
+
Answer: This student received a full score./This student did not receive a full score.
|
117 |
+
Question: How many students lost marks in X category of the rubric?
|
118 |
+
Answer: This student lost marks in X category of the rubric./This student did not lose marks in X category of the rubric.
|
119 |
+
|
120 |
+
|
121 |
+
______________________
|
122 |
+
Grading Result For:
|
123 |
+
{{context}}
|
124 |
+
______________________
|
125 |
+
Following are the instructions and rubric of the discussion post for reference, used to grade the discussion.
|
126 |
+
----------------
|
127 |
+
Instructions and Rubric:
|
128 |
+
{self.rubric_text}
|
129 |
+
"""
|
130 |
+
messages = [
|
131 |
+
SystemMessagePromptTemplate.from_template(system_template),
|
132 |
+
HumanMessagePromptTemplate.from_template("{question}"),
|
133 |
+
]
|
134 |
+
CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(messages)
|
135 |
+
system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
|
136 |
+
Use the following answers for each student to answer the users question as accurately as possible.
|
137 |
+
You are an expert at basic calculations and answering questions on grading results and can answer the following questions with ease.
|
138 |
+
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
|
139 |
+
______________________
|
140 |
+
{summaries}"""
|
141 |
+
messages = [
|
142 |
+
SystemMessagePromptTemplate.from_template(system_template),
|
143 |
+
HumanMessagePromptTemplate.from_template("{question}"),
|
144 |
+
]
|
145 |
+
CHAT_COMBINE_PROMPT = ChatPromptTemplate.from_messages(messages)
|
146 |
+
return CHAT_QUESTION_PROMPT, CHAT_COMBINE_PROMPT
|
147 |
+
|
148 |
def create_prompt(self):
|
149 |
+
system_template = f"""You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
|
150 |
+
You are a grading assistant who graded the canvas discussions to create the following grading results and feedback.
|
151 |
+
Use the following instruction, rubric of the discussion which were used to grade the discussions and refine the answer if needed.
|
152 |
----------------
|
153 |
{self.rubric_text}
|
154 |
----------------
|
155 |
+
Use the following pieces of the grading results, score, feedback and summary of student responses to answer the users question as accurately as possible.
|
156 |
{{context}}"""
|
157 |
messages = [
|
158 |
SystemMessagePromptTemplate.from_template(system_template),
|