Spaces:
Runtime error
Runtime error
Changes for automated grader integrated with a chatbot
Browse files- app_new.py +232 -0
- grader.py +257 -0
- ingest.py +174 -0
- requirements.in +11 -0
- utils.py +253 -0
app_new.py
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import os
|
3 |
+
import time
|
4 |
+
import glob
|
5 |
+
import gradio as gr
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from langchain.chat_models import ChatOpenAI
|
8 |
+
from langchain.embeddings import OpenAIEmbeddings
|
9 |
+
|
10 |
+
from grader import Grader
|
11 |
+
from ingest import ingest_canvas_discussions
|
12 |
+
from utils import GraderQA
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
pickle_file = "vector_stores/canvas-discussions.pkl"
|
17 |
+
index_file = "vector_stores/canvas-discussions.index"
|
18 |
+
|
19 |
+
grading_model = 'gpt-4'
|
20 |
+
qa_model = 'gpt-3.5-turbo-16k'
|
21 |
+
|
22 |
+
llm = ChatOpenAI(model_name=qa_model, temperature=0, verbose=True)
|
23 |
+
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
|
24 |
+
|
25 |
+
grader = None
|
26 |
+
grader_qa = None
|
27 |
+
|
28 |
+
|
29 |
+
def add_text(history, text):
|
30 |
+
print("Question asked: " + text)
|
31 |
+
response = run_model(text)
|
32 |
+
history = history + [(text, response)]
|
33 |
+
print(history)
|
34 |
+
return history, ""
|
35 |
+
|
36 |
+
|
37 |
+
def run_model(text):
|
38 |
+
global grader, grader_qa
|
39 |
+
start_time = time.time()
|
40 |
+
print("start time:" + str(start_time))
|
41 |
+
if not grader_qa and not grader:
|
42 |
+
if os.path.isfile(pickle_file) and os.path.isfile(index_file) and os.path.getsize(
|
43 |
+
pickle_file) > 0 and os.path.isfile('docs/discussion_entries.json.json') and os.path.isfile(
|
44 |
+
'docs/rubric-data.json') > 0:
|
45 |
+
grader = Grader(qa_model)
|
46 |
+
grader_qa = GraderQA(grader, embeddings)
|
47 |
+
elif not grader_qa:
|
48 |
+
grader.llm.model_name = qa_model
|
49 |
+
grader_qa = GraderQA(grader, embeddings)
|
50 |
+
response = grader_qa.chain(text)
|
51 |
+
sources = []
|
52 |
+
for document in response['source_documents']:
|
53 |
+
sources.append(str(document.metadata))
|
54 |
+
print(sources)
|
55 |
+
|
56 |
+
source = ','.join(set(sources))
|
57 |
+
response = response['answer'] + '\nSources: ' + source
|
58 |
+
end_time = time.time()
|
59 |
+
# # If response contains string `SOURCES:`, then add a \n before `SOURCES`
|
60 |
+
# if "SOURCES:" in response:
|
61 |
+
# response = response.replace("SOURCES:", "\nSOURCES:")
|
62 |
+
response = response + "\n\n" + "Time taken: " + str(end_time - start_time)
|
63 |
+
print(response)
|
64 |
+
print("Time taken: " + str(end_time - start_time))
|
65 |
+
return response
|
66 |
+
|
67 |
+
|
68 |
+
def set_model(history):
|
69 |
+
history = get_first_message(history)
|
70 |
+
return history
|
71 |
+
|
72 |
+
|
73 |
+
def ingest(url, canvas_api_key, openai_api_key, history):
|
74 |
+
global grader
|
75 |
+
text = f"Download data from {url} and ingest it to grade discussions"
|
76 |
+
ingest_canvas_discussions(url, canvas_api_key)
|
77 |
+
grader = Grader(grading_model)
|
78 |
+
response = "Ingested canvas data successfully"
|
79 |
+
history = history + [(text, response)]
|
80 |
+
return get_grading_status(history)
|
81 |
+
|
82 |
+
|
83 |
+
def start_grading(url, canvas_api_key, openai_api_key, history):
|
84 |
+
global grader, grader_qa
|
85 |
+
text = f"Start grading discussions from {url}"
|
86 |
+
if not url or not canvas_api_key or not openai_api_key:
|
87 |
+
response = "Please enter all the fields to initiate grading"
|
88 |
+
elif grader:
|
89 |
+
# Create a new event loop
|
90 |
+
loop = asyncio.new_event_loop()
|
91 |
+
asyncio.set_event_loop(loop)
|
92 |
+
try:
|
93 |
+
# Use the event loop to run the async function
|
94 |
+
loop.run_until_complete(grader.run_chain())
|
95 |
+
grader_qa = GraderQA(grader, embeddings)
|
96 |
+
response = "Grading done"
|
97 |
+
finally:
|
98 |
+
# Close the loop after use
|
99 |
+
loop.close()
|
100 |
+
else:
|
101 |
+
response = "Please ingest data before grading"
|
102 |
+
history = history + [(text, response)]
|
103 |
+
return history
|
104 |
+
|
105 |
+
|
106 |
+
def start_downloading():
|
107 |
+
# grader.download()
|
108 |
+
return "Downloaded"
|
109 |
+
|
110 |
+
|
111 |
+
def get_first_message(history):
|
112 |
+
global grader_qa
|
113 |
+
history = [(None,
|
114 |
+
'Get feedback on your canvas discussions. Add your discussion url and get your discussions graded in instantly.')]
|
115 |
+
history = get_grading_status(history)
|
116 |
+
return history
|
117 |
+
|
118 |
+
|
119 |
+
def get_grading_status(history):
|
120 |
+
global grader, grader_qa
|
121 |
+
# Check if grading is complete
|
122 |
+
if os.path.isdir('output') and len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")) > 0:
|
123 |
+
if not grader:
|
124 |
+
grader = Grader(qa_model)
|
125 |
+
grader_qa = GraderQA(grader, embeddings)
|
126 |
+
elif not grader_qa:
|
127 |
+
grader_qa = GraderQA(grader, embeddings)
|
128 |
+
history = history + [(None, 'Grading is already complete. You can now ask questions')]
|
129 |
+
enable_fields(False, False, False, False, False, True, True, True)
|
130 |
+
# Check if data is ingested
|
131 |
+
elif len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")):
|
132 |
+
if not grader_qa:
|
133 |
+
grader = Grader(qa_model)
|
134 |
+
history = history + [(None, 'Canvas data is already ingested. You can grade discussions now')]
|
135 |
+
enable_fields(False, False, False, False, True, True, False, False)
|
136 |
+
else:
|
137 |
+
history = history + [(None, 'Please ingest data and start grading')]
|
138 |
+
url.disabled = True
|
139 |
+
enable_fields(True, True, True, True, True, True, False, False)
|
140 |
+
return history
|
141 |
+
|
142 |
+
|
143 |
+
# handle enable/disable of fields
|
144 |
+
def enable_fields(url_status, canvas_api_key_status, openai_api_key_status, submit_status, grade_status,
|
145 |
+
download_status, chatbot_txt_status, chatbot_btn_status):
|
146 |
+
url.interactive = url_status
|
147 |
+
canvas_api_key.interactive = canvas_api_key_status
|
148 |
+
openai_api_key.interactive = openai_api_key_status
|
149 |
+
submit.interactive = submit_status
|
150 |
+
grade.interactive = grade_status
|
151 |
+
download.interactive = download_status
|
152 |
+
txt.interactive = chatbot_txt_status
|
153 |
+
ask.interactive = chatbot_btn_status
|
154 |
+
if not chatbot_txt_status:
|
155 |
+
txt.placeholder = "Please grade discussions first"
|
156 |
+
else:
|
157 |
+
txt.placeholder = "Ask a question"
|
158 |
+
if not url_status:
|
159 |
+
url.placeholder = "Data already ingested"
|
160 |
+
if not canvas_api_key_status:
|
161 |
+
canvas_api_key.placeholder = "Data already ingested"
|
162 |
+
if not openai_api_key_status:
|
163 |
+
openai_api_key.placeholder = "Data already ingested"
|
164 |
+
|
165 |
+
def bot(history):
|
166 |
+
return history
|
167 |
+
|
168 |
+
|
169 |
+
with gr.Blocks() as demo:
|
170 |
+
gr.Markdown(f"<h2><center>{'Canvas Discussion Grading With Feedback'}</center></h2>")
|
171 |
+
|
172 |
+
with gr.Row():
|
173 |
+
url = gr.Textbox(
|
174 |
+
label="Canvas Discussion URL",
|
175 |
+
placeholder="Enter your Canvas Discussion URL"
|
176 |
+
)
|
177 |
+
|
178 |
+
canvas_api_key = gr.Textbox(
|
179 |
+
label="Canvas API Key",
|
180 |
+
placeholder="Enter your Canvas API Key", type="password"
|
181 |
+
)
|
182 |
+
|
183 |
+
openai_api_key = gr.Textbox(
|
184 |
+
label="OpenAI API Key",
|
185 |
+
placeholder="Enter your OpenAI API Key", type="password"
|
186 |
+
)
|
187 |
+
|
188 |
+
with gr.Row():
|
189 |
+
submit = gr.Button(value="Submit", variant="secondary", )
|
190 |
+
grade = gr.Button(value="Grade", variant="secondary")
|
191 |
+
download = gr.Button(value="Download", variant="secondary")
|
192 |
+
reset = gr.Button(value="Reset", variant="secondary")
|
193 |
+
|
194 |
+
chatbot = gr.Chatbot([], label="Chat with grading results", elem_id="chatbot", height=400)
|
195 |
+
|
196 |
+
with gr.Row():
|
197 |
+
with gr.Column(scale=3):
|
198 |
+
txt = gr.Textbox(
|
199 |
+
label="Ask questions about how students did on the discussion",
|
200 |
+
placeholder="Enter text and press enter, or upload an image", lines=1
|
201 |
+
)
|
202 |
+
ask = gr.Button(value="Ask", variant="secondary", scale=1)
|
203 |
+
|
204 |
+
chatbot.value = get_first_message([])
|
205 |
+
submit.click(ingest, inputs=[url, canvas_api_key, openai_api_key, chatbot], outputs=[chatbot],
|
206 |
+
postprocess=False).then(
|
207 |
+
bot, chatbot, chatbot
|
208 |
+
)
|
209 |
+
|
210 |
+
grade.click(start_grading, inputs=[url, canvas_api_key, openai_api_key, chatbot], outputs=[chatbot],
|
211 |
+
postprocess=False).then(
|
212 |
+
bot, chatbot, chatbot
|
213 |
+
)
|
214 |
+
|
215 |
+
download.click(start_downloading, inputs=[], outputs=[chatbot], postprocess=False).then(
|
216 |
+
bot, chatbot, chatbot
|
217 |
+
)
|
218 |
+
|
219 |
+
txt.submit(add_text, [chatbot, txt], [chatbot, txt], postprocess=False).then(
|
220 |
+
bot, chatbot, chatbot
|
221 |
+
)
|
222 |
+
|
223 |
+
ask.click(add_text, inputs=[chatbot, txt], outputs=[chatbot, txt], postprocess=False,).then(
|
224 |
+
bot, chatbot, chatbot
|
225 |
+
)
|
226 |
+
|
227 |
+
set_model(chatbot)
|
228 |
+
|
229 |
+
if __name__ == "__main__":
|
230 |
+
demo.queue()
|
231 |
+
demo.queue(concurrency_count=5)
|
232 |
+
demo.launch(debug=True, )
|
grader.py
ADDED
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import csv
|
3 |
+
import glob
|
4 |
+
import json
|
5 |
+
import shutil
|
6 |
+
from datetime import datetime
|
7 |
+
from typing import Optional
|
8 |
+
|
9 |
+
from langchain import PromptTemplate
|
10 |
+
from langchain.chains import LLMChain, MapReduceChain
|
11 |
+
from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain, ReduceDocumentsChain
|
12 |
+
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
13 |
+
from langchain.chains.summarize import load_summarize_chain
|
14 |
+
from langchain.chat_models import ChatOpenAI
|
15 |
+
from langchain.document_loaders import DirectoryLoader, UnstructuredHTMLLoader
|
16 |
+
from langchain.output_parsers import PydanticOutputParser
|
17 |
+
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, Language
|
18 |
+
from pathvalidate import sanitize_filename
|
19 |
+
from pydantic import BaseModel, Field
|
20 |
+
from tqdm import tqdm
|
21 |
+
import os
|
22 |
+
|
23 |
+
|
24 |
+
class Grader:
|
25 |
+
def __init__(self, model):
|
26 |
+
print("Setting up environment for grading")
|
27 |
+
os.environ["LANGCHAIN_TRACING"] = "true"
|
28 |
+
self.title = None
|
29 |
+
self.model = model
|
30 |
+
self.rubric_file = 'docs/rubric_data.json'
|
31 |
+
self.discussions_file_path = "docs/discussion_entries.json"
|
32 |
+
self.fieldnames = ['student_name', 'total_score', 'student_feedback', 'grader_comments', 'summary']
|
33 |
+
self.docs = self.get_html_files()
|
34 |
+
self.llm = ChatOpenAI(temperature=0, model_name=model)
|
35 |
+
self.parser: PydanticOutputParser = self.create_parser()
|
36 |
+
self.rubric_text = self.create_rubric_text()
|
37 |
+
self.prompt = self.create_prompt()
|
38 |
+
self.splitter = None
|
39 |
+
self.tokens = self.get_num_tokens()
|
40 |
+
self.llm_chain = self.create_llm_chain(model)
|
41 |
+
self.csv = self.get_csv_file_name()
|
42 |
+
self.outputs = []
|
43 |
+
self.completed = 0
|
44 |
+
self.lock = asyncio.Lock()
|
45 |
+
|
46 |
+
class ToolArgsSchema(BaseModel):
|
47 |
+
student_name: Optional[str] = Field(description="The name of the student")
|
48 |
+
total_score: int = Field(description="The grade of the student's answer")
|
49 |
+
student_feedback: Optional[str] = Field(
|
50 |
+
description="The developmental feedback from Grader's point of view to the student, some examples are: 'Great work, ...', 'Although, your submission is relevant to the question, it doesn't answer the question entirely...'. Give customized feedback based on student's answer")
|
51 |
+
grader_comments: Optional[str] = Field(
|
52 |
+
description="The grade split breakup based on rubric added as grader's one liner customized comments to explain how the grade was calculated for that particular student's answer")
|
53 |
+
summary: Optional[str] = Field(
|
54 |
+
description="The overall summary of the student's answer outlining key points from the student's answer based on the rubric which can be used as a portion of a vectorstore, used to answer summary based questions about all the discussions")
|
55 |
+
|
56 |
+
class Config:
|
57 |
+
schema_extra = {
|
58 |
+
"required": ["student_name", "total_score", "student_feedback", "grader_comments", "summary"]
|
59 |
+
}
|
60 |
+
|
61 |
+
def create_parser(self):
|
62 |
+
# print("in parser")
|
63 |
+
return PydanticOutputParser(pydantic_object=self.ToolArgsSchema)
|
64 |
+
|
65 |
+
def create_rubric_text(self):
|
66 |
+
with open(self.rubric_file, 'r') as file:
|
67 |
+
rubric = json.load(file)
|
68 |
+
rubric_text = []
|
69 |
+
self.title = None # Initialize title
|
70 |
+
for r in rubric:
|
71 |
+
if 'description' in r and 'ratings' in r:
|
72 |
+
rubric_text.append(f"description:{r['description']}\n" + "\n".join(
|
73 |
+
[f"points:{rating['points']} points: {rating['description']}" for rating in r['ratings']]))
|
74 |
+
elif 'points_possible' in r:
|
75 |
+
print("added points_possible")
|
76 |
+
elif 'title' in r: # Check if title exists in rubric
|
77 |
+
self.title = r['title'] # Save title for later use
|
78 |
+
rubric_text.append(f"title:{self.title}")
|
79 |
+
elif 'instruction' in r:
|
80 |
+
rubric_text.append(f"instruction:{r['instruction']}")
|
81 |
+
|
82 |
+
rubric_text = "\n".join(rubric_text)
|
83 |
+
# print(rubric_text) Add this to log when moving to application
|
84 |
+
return rubric_text
|
85 |
+
|
86 |
+
|
87 |
+
def create_map_prompt(self):
|
88 |
+
map_template_string = f"""I am an expert concise Canvas Discussion Summarizer! I am here to concisely summarize the following sections of a long canvas discussion responses of this student on the basis of instructions and rubric provided.
|
89 |
+
The aim is to capture the important and key points on the basis of instructions and rubric provided and create a short summary, so that grading can be done on all the summarized sections of canvas discussion of a student's response.
|
90 |
+
--------------------
|
91 |
+
Following is the canvas instruction and rubric:
|
92 |
+
{self.rubric_text}
|
93 |
+
--------------------
|
94 |
+
I will summarize this extracted part of a long canvas discussion:
|
95 |
+
{{input_documents}}
|
96 |
+
"""
|
97 |
+
return PromptTemplate(template=map_template_string, input_variables=["input_documents"])
|
98 |
+
|
99 |
+
def create_reduce_prompt(self):
|
100 |
+
reduce_template_string = f"""I am a Canvas Discussion Grader! I am here to grade the following summarized sections of canvas discussion responses of the student on the basis of instructions and rubric provided.
|
101 |
+
--------------------
|
102 |
+
To grade student discussion, I will follow the rubric below. I will not deviate from the grading scheme.
|
103 |
+
{self.rubric_text}
|
104 |
+
--------------------
|
105 |
+
I will be able to identify each student by name, their key interests, key features pertinent to the discussion intruction and rubric.
|
106 |
+
I will be able to summarize the entire discussion in concise manner including key points from each student's answer.
|
107 |
+
--------------------
|
108 |
+
I will grade the following summarized canvas discussion: {{input_documents}}
|
109 |
+
--------------------
|
110 |
+
My grading results will ALWAYS be in following format:
|
111 |
+
Format instructions: {{format_instructions}}
|
112 |
+
"""
|
113 |
+
return PromptTemplate(
|
114 |
+
template=reduce_template_string,
|
115 |
+
input_variables=["input_documents"],
|
116 |
+
output_parser=self.parser,
|
117 |
+
partial_variables={"format_instructions": self.parser.get_format_instructions()}
|
118 |
+
)
|
119 |
+
|
120 |
+
def create_map_llm_chain(self):
|
121 |
+
print("Ready to grade!")
|
122 |
+
map_llm_chain = LLMChain(
|
123 |
+
llm=self.llm,
|
124 |
+
prompt=self.map_prompt,
|
125 |
+
verbose=True,
|
126 |
+
)
|
127 |
+
return map_llm_chain
|
128 |
+
|
129 |
+
def create_reduce_llm_chain(self):
|
130 |
+
reduce_llm_chain = LLMChain(
|
131 |
+
llm=self.llm,
|
132 |
+
prompt=self.reduce_prompt,
|
133 |
+
verbose=True,
|
134 |
+
)
|
135 |
+
return reduce_llm_chain
|
136 |
+
|
137 |
+
async def process_file(self, file, pbar):
|
138 |
+
if self.model == 'gpt-4':
|
139 |
+
await asyncio.sleep(10) # Add a 3-second delay before each request
|
140 |
+
result = await self.llm_chain.arun(file)
|
141 |
+
output: self.ToolArgsSchema = self.parser.parse(result)
|
142 |
+
async with self.lock:
|
143 |
+
self.completed += 1
|
144 |
+
pbar.update(1)
|
145 |
+
return result
|
146 |
+
|
147 |
+
async def run_chain(self):
|
148 |
+
print("Grading Started! Now sit back and get a coffee \u2615")
|
149 |
+
total = len(self.docs)
|
150 |
+
pbar = tqdm(total=total)
|
151 |
+
# if model is gpt-4, batch size is 2, else batch size is 5
|
152 |
+
batch_size = 2 if self.model == 'gpt-4' else 5
|
153 |
+
batches = [self.docs[i:i + batch_size] for i in range(0, len(self.docs), batch_size)]
|
154 |
+
for batch in batches:
|
155 |
+
tasks = [self.process_file(file, pbar) for file in batch]
|
156 |
+
results = await asyncio.gather(*tasks)
|
157 |
+
for result in results:
|
158 |
+
output: self.ToolArgsSchema = self.parser.parse(result)
|
159 |
+
self.outputs.append(output)
|
160 |
+
if self.model == 'gpt-4':
|
161 |
+
await asyncio.sleep(3) # Add a delay between each batch
|
162 |
+
pbar.close()
|
163 |
+
self.save_csv()
|
164 |
+
return True
|
165 |
+
|
166 |
+
def create_csv(self):
|
167 |
+
# remove existing csvs in output folder
|
168 |
+
if os.path.exists('output'):
|
169 |
+
shutil.rmtree('output')
|
170 |
+
|
171 |
+
os.mkdir('output')
|
172 |
+
now = datetime.now() # current date and time
|
173 |
+
date_time = now.strftime("%m-%d-%Y_%H-%M-%S")
|
174 |
+
if self.title: # If title exists, use it in the filename
|
175 |
+
file_name = f"{self.title}-{self.llm.model_name}-{date_time}.csv"
|
176 |
+
else: # If title doesn't exist, use 'output' in the filename
|
177 |
+
file_name = f"output-{self.llm.model_name}-{date_time}.csv"
|
178 |
+
|
179 |
+
# Sanitize the entire filename
|
180 |
+
sanitized_file_name = sanitize_filename(file_name)
|
181 |
+
sanitized_file_name = os.path.join('output', sanitized_file_name)
|
182 |
+
|
183 |
+
with open(sanitized_file_name, 'w', newline='') as csvfile:
|
184 |
+
writer = csv.DictWriter(csvfile, fieldnames=self.fieldnames)
|
185 |
+
writer.writeheader()
|
186 |
+
return sanitized_file_name
|
187 |
+
|
188 |
+
def save_csv(self):
|
189 |
+
# Use the filename created in create_csv method
|
190 |
+
self.csv = self.create_csv()
|
191 |
+
with open(self.csv, 'a', newline='') as csvfile:
|
192 |
+
writer = csv.DictWriter(csvfile, fieldnames=self.fieldnames)
|
193 |
+
rows = [output.dict() for output in self.outputs] # Convert each output to a dictionary
|
194 |
+
writer.writerows(rows) # Write all rows to the CSV
|
195 |
+
print(f"Saved grades for {len(self.outputs)} students in {self.csv}")
|
196 |
+
return True
|
197 |
+
return False
|
198 |
+
|
199 |
+
def get_html_files(self):
|
200 |
+
loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
|
201 |
+
document_list = loader.load()
|
202 |
+
for document in document_list:
|
203 |
+
document.metadata["name"] = document.metadata["source"].split("/")[-1].split(".")[0]
|
204 |
+
break
|
205 |
+
return document_list
|
206 |
+
|
207 |
+
def create_prompt(self):
|
208 |
+
# print("in prompt")
|
209 |
+
prompt_template = f"""I am a Canvas Discussion Grader! I am here to grade the following canvas discussion on the basis of instructions and rubric provided.
|
210 |
+
To grade student discussion, I will follow the rubric below. I will not deviate from the grading scheme.
|
211 |
+
{self.rubric_text}
|
212 |
+
|
213 |
+
I will be able to identify each student by name, identify their key interests, key features of the responses pertinent to the discussion intruction and rubric.
|
214 |
+
I will be able to summarize the entire discussion in concise manner including key points from each student's answer.
|
215 |
+
I will grade the following canvas discussion: {{input_documents}}
|
216 |
+
|
217 |
+
My grading results will ALWAYS be in following format:
|
218 |
+
Format instructions: {{format_instructions}}
|
219 |
+
"""
|
220 |
+
return PromptTemplate(template=prompt_template, input_variables=["input_documents"], output_parser=self.parser,
|
221 |
+
partial_variables={"format_instructions": self.parser.get_format_instructions()})
|
222 |
+
|
223 |
+
def create_llm_chain(self, model):
|
224 |
+
print("Ready to grade!")
|
225 |
+
|
226 |
+
return LLMChain(
|
227 |
+
llm=self.llm,
|
228 |
+
prompt=self.prompt,
|
229 |
+
)
|
230 |
+
|
231 |
+
def get_num_tokens(self):
|
232 |
+
total_tokens = 0
|
233 |
+
for doc in self.docs:
|
234 |
+
summary_prompt = self.prompt.format(input_documents=doc)
|
235 |
+
|
236 |
+
num_tokens = self.llm.get_num_tokens(summary_prompt)
|
237 |
+
total_tokens += num_tokens
|
238 |
+
|
239 |
+
# summary = self.llm(summary_prompt)
|
240 |
+
|
241 |
+
# print (f"Summary: {summary.strip()}")
|
242 |
+
# print ("\n")
|
243 |
+
return total_tokens
|
244 |
+
|
245 |
+
def get_csv_file_name(self):
|
246 |
+
output_dir = 'output'
|
247 |
+
if os.path.exists(output_dir):
|
248 |
+
csv_files = glob.glob(os.path.join(output_dir, '*.csv'))
|
249 |
+
if csv_files:
|
250 |
+
return csv_files[0] # return the first csv file found
|
251 |
+
return None
|
252 |
+
|
253 |
+
|
254 |
+
def run(model):
|
255 |
+
grader = Grader(model)
|
256 |
+
asyncio.run(grader.run_chain())
|
257 |
+
print("Grading successful")
|
ingest.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import json
|
4 |
+
import shutil
|
5 |
+
|
6 |
+
import requests
|
7 |
+
from bs4 import BeautifulSoup
|
8 |
+
from typing import List
|
9 |
+
|
10 |
+
rubric = None
|
11 |
+
message = None
|
12 |
+
rubric_file = 'docs/rubric_data.json'
|
13 |
+
discussion_entries_file = 'docs/discussion_entries.json'
|
14 |
+
|
15 |
+
class DiscussionEntry:
|
16 |
+
def __init__(self, id: int, parent_id: int, name: str, message: str, replies: List):
|
17 |
+
self.id = id
|
18 |
+
self.parent_id = parent_id
|
19 |
+
self.name = name
|
20 |
+
self.message = message
|
21 |
+
self.replies = replies
|
22 |
+
|
23 |
+
def to_json(self):
|
24 |
+
return {
|
25 |
+
'id': self.id,
|
26 |
+
'parent_id': self.parent_id,
|
27 |
+
'name': self.name,
|
28 |
+
'message': self.message,
|
29 |
+
'replies': [reply.to_json() for reply in self.replies]
|
30 |
+
}
|
31 |
+
|
32 |
+
def dump_json(self, filename):
|
33 |
+
with open(filename, 'w') as f:
|
34 |
+
json.dump(self.to_json(), f)
|
35 |
+
|
36 |
+
def extract_entries(entries, participants):
|
37 |
+
result = []
|
38 |
+
for entry in entries:
|
39 |
+
if 'message' in entry and 'deleted' not in entry:
|
40 |
+
id = entry['id']
|
41 |
+
parent_id = entry['parent_id']
|
42 |
+
user_id = entry['user_id']
|
43 |
+
name = next((p['display_name'] for p in participants if p['id'] == user_id), None)
|
44 |
+
message = entry['message']
|
45 |
+
replies = []
|
46 |
+
if 'replies' in entry:
|
47 |
+
replies = extract_entries(entry['replies'], participants)
|
48 |
+
result.append(DiscussionEntry(id, parent_id, name, message, replies))
|
49 |
+
return result
|
50 |
+
|
51 |
+
def save_messages(entries, group_id=None):
|
52 |
+
|
53 |
+
for entry in entries:
|
54 |
+
filename = f'docs/{entry.name}.html'
|
55 |
+
if group_id is not None:
|
56 |
+
filename = f'docs/group_{group_id}_{entry.name}.html'
|
57 |
+
|
58 |
+
with open(filename, 'a+') as f:
|
59 |
+
if entry.parent_id == None:
|
60 |
+
f.write(f'<h1><b>Student Post: {entry.name}</b></h1>')
|
61 |
+
f.write(entry.message)
|
62 |
+
f.write('<hr>')
|
63 |
+
else:
|
64 |
+
f.write(f'<h2><b>Reply to: {entry.parent_id}</b></h2>')
|
65 |
+
f.write(entry.message)
|
66 |
+
f.write('<hr>')
|
67 |
+
|
68 |
+
save_messages(entry.replies, group_id)
|
69 |
+
|
70 |
+
def extract_group_discussions(group_topic_children, headers):
|
71 |
+
group_entries = []
|
72 |
+
for group_topic in group_topic_children:
|
73 |
+
group_id = group_topic['group_id']
|
74 |
+
topic_id = group_topic['id']
|
75 |
+
group_discussion_url = f'{base_url}/api/v1/groups/{group_id}/discussion_topics/{topic_id}/view'
|
76 |
+
group_discussion_response = requests.get(group_discussion_url, headers=headers)
|
77 |
+
if group_discussion_response.ok:
|
78 |
+
group_discussion_data = group_discussion_response.json()
|
79 |
+
entries = extract_entries(group_discussion_data['view'], group_discussion_data['participants'])
|
80 |
+
# Dump JSON data for group-based discussion
|
81 |
+
with open(discussion_entries_file, 'w') as f:
|
82 |
+
json.dump([entry.to_json() for entry in entries], f)
|
83 |
+
group_entries.append({
|
84 |
+
'group_id': group_id,
|
85 |
+
'entries': entries
|
86 |
+
})
|
87 |
+
return group_entries
|
88 |
+
|
89 |
+
def extract_individual_discussion(discussion_url, headers):
|
90 |
+
individual_entries = []
|
91 |
+
discussion_response = requests.get(discussion_url, headers=headers)
|
92 |
+
if discussion_response.ok:
|
93 |
+
discussion_data = discussion_response.json()
|
94 |
+
entries = extract_entries(discussion_data['view'], discussion_data['participants'])
|
95 |
+
# Dump JSON data for individual discussion
|
96 |
+
with open(discussion_entries_file, 'w') as f:
|
97 |
+
json.dump([entry.to_json() for entry in entries], f)
|
98 |
+
individual_entries.extend(entries)
|
99 |
+
return individual_entries
|
100 |
+
|
101 |
+
|
102 |
+
def ingest_canvas_discussions(input_url, access_token):
|
103 |
+
global base_url, rubric, message
|
104 |
+
match = re.match(r'https://canvas.illinois.edu/courses/(\d+)/discussion_topics/(\d+)', input_url)
|
105 |
+
if match:
|
106 |
+
course_id, discussion_topic_id = match.groups()
|
107 |
+
else:
|
108 |
+
raise ValueError("Invalid URL")
|
109 |
+
base_url = 'https://canvas.illinois.edu'
|
110 |
+
headers = {
|
111 |
+
'Authorization': f'Bearer {access_token}'
|
112 |
+
}
|
113 |
+
discussion_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}/view'
|
114 |
+
instruction_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}'
|
115 |
+
instruction_response = requests.get(instruction_url, headers=headers)
|
116 |
+
if instruction_response.ok:
|
117 |
+
instruction_data = instruction_response.json()
|
118 |
+
print(instruction_data)
|
119 |
+
rubric = []
|
120 |
+
|
121 |
+
# Extract title if it exists
|
122 |
+
if 'title' in instruction_data:
|
123 |
+
title = instruction_data['title']
|
124 |
+
rubric = [{'title': title}]
|
125 |
+
|
126 |
+
if 'description' in instruction_data['assignment']:
|
127 |
+
message_html = instruction_data['assignment']['description']
|
128 |
+
soup = BeautifulSoup(message_html, 'html.parser')
|
129 |
+
message = soup.get_text()
|
130 |
+
rubric.append({'instruction': message})
|
131 |
+
|
132 |
+
if 'rubric' in instruction_data['assignment'] and 'description' in instruction_data['assignment']:
|
133 |
+
rubric.extend(instruction_data['assignment']['rubric'])
|
134 |
+
|
135 |
+
if 'points_possible' in instruction_data['assignment']:
|
136 |
+
points_possible = instruction_data['assignment']['points_possible']
|
137 |
+
rubric.append({'points_possible': points_possible})
|
138 |
+
|
139 |
+
# Check if the docs folder exists
|
140 |
+
if os.path.exists('docs'):
|
141 |
+
#delete the folder
|
142 |
+
shutil.rmtree('docs')
|
143 |
+
|
144 |
+
# Create the docs folder
|
145 |
+
os.makedirs('docs')
|
146 |
+
with open(rubric_file, 'w') as f:
|
147 |
+
json.dump(rubric, f)
|
148 |
+
|
149 |
+
print("Extracted instructions and rubric")
|
150 |
+
else:
|
151 |
+
print(f'Error: {instruction_response.text}')
|
152 |
+
|
153 |
+
# Check if the discussion is an individual discussion with associated group-based discussions
|
154 |
+
if 'group_topic_children' in instruction_data:
|
155 |
+
# Extract and save group-based discussions
|
156 |
+
group_entries = extract_group_discussions(instruction_data['group_topic_children'], headers)
|
157 |
+
os.makedirs('docs', exist_ok=True)
|
158 |
+
print("Extracted group discussion entries")
|
159 |
+
for group_entry in group_entries:
|
160 |
+
save_messages(group_entry['entries'], group_entry['group_id'])
|
161 |
+
else:
|
162 |
+
# Extract and save standalone individual or group-based discussion
|
163 |
+
individual_entries = extract_individual_discussion(discussion_url, headers)
|
164 |
+
print("Extracted individual discussion entries")
|
165 |
+
os.makedirs('docs', exist_ok=True)
|
166 |
+
save_messages(individual_entries)
|
167 |
+
|
168 |
+
else:
|
169 |
+
print(f'Error: {instruction_response.text}')
|
170 |
+
|
171 |
+
|
172 |
+
def create_vector_store():
|
173 |
+
|
174 |
+
return None
|
requirements.in
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
lanarky
|
2 |
+
langchain
|
3 |
+
openai
|
4 |
+
tiktoken
|
5 |
+
faiss-cpu
|
6 |
+
gradio
|
7 |
+
fastapi
|
8 |
+
uvicorn[standard]
|
9 |
+
bs4
|
10 |
+
pathvalidate
|
11 |
+
unstructured
|
utils.py
ADDED
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from langchain import FAISS
|
4 |
+
from langchain.chains import ConversationalRetrievalChain
|
5 |
+
from langchain.document_loaders import DirectoryLoader, UnstructuredHTMLLoader, TextLoader, CSVLoader
|
6 |
+
from langchain.memory import ConversationSummaryBufferMemory
|
7 |
+
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
|
8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter, Language
|
9 |
+
|
10 |
+
from grader import Grader
|
11 |
+
|
12 |
+
|
13 |
+
def search_index_from_docs(source_chunks, embeddings):
|
14 |
+
# print("source chunks: " + str(len(source_chunks)))
|
15 |
+
# print("embeddings: " + str(embeddings))
|
16 |
+
search_index = FAISS.from_documents(source_chunks, embeddings)
|
17 |
+
return search_index
|
18 |
+
|
19 |
+
|
20 |
+
def get_chat_history(inputs) -> str:
|
21 |
+
res = []
|
22 |
+
for human, ai in inputs:
|
23 |
+
res.append(f"Human:{human}\nAI:{ai}")
|
24 |
+
return "\n".join(res)
|
25 |
+
|
26 |
+
|
27 |
+
class GraderQA():
|
28 |
+
def __init__(self, grader, embeddings):
|
29 |
+
self.grader = grader
|
30 |
+
self.llm = self.grader.llm
|
31 |
+
self.index_file = "vector_stores/canvas-discussions.faiss"
|
32 |
+
self.pickle_file = "vector_stores/canvas-discussions.pkl"
|
33 |
+
self.rubric_text = grader.rubric_text
|
34 |
+
self.search_index = self.get_search_index(embeddings)
|
35 |
+
self.chain = self.create_chain(embeddings)
|
36 |
+
self.tokens = None
|
37 |
+
self.question = None
|
38 |
+
|
39 |
+
def get_search_index(self, embeddings):
|
40 |
+
if os.path.isfile(self.pickle_file) and os.path.isfile(self.index_file) and os.path.getsize(
|
41 |
+
self.pickle_file) > 0:
|
42 |
+
# Load index from pickle file
|
43 |
+
search_index = self.load_index(embeddings)
|
44 |
+
else:
|
45 |
+
search_index = self.create_index(embeddings)
|
46 |
+
print("Created index")
|
47 |
+
return search_index
|
48 |
+
|
49 |
+
def load_index(self, embeddings):
|
50 |
+
# Load index
|
51 |
+
db = FAISS.load_local(
|
52 |
+
folder_path="vector_stores/",
|
53 |
+
index_name="canvas-discussions", embeddings=embeddings,
|
54 |
+
)
|
55 |
+
print("Loaded index")
|
56 |
+
return db
|
57 |
+
|
58 |
+
def create_index(self, embeddings):
|
59 |
+
source_chunks = self.create_chunk_documents()
|
60 |
+
search_index = search_index_from_docs(source_chunks, embeddings)
|
61 |
+
FAISS.save_local(search_index, folder_path="vector_stores/", index_name="canvas-discussions")
|
62 |
+
return search_index
|
63 |
+
|
64 |
+
def create_chunk_documents(self):
|
65 |
+
sources = self.fetch_data_for_embeddings()
|
66 |
+
|
67 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
68 |
+
|
69 |
+
source_chunks = splitter.split_documents(sources)
|
70 |
+
|
71 |
+
print("chunks: " + str(len(source_chunks)))
|
72 |
+
print("sources: " + str(len(sources)))
|
73 |
+
|
74 |
+
return source_chunks
|
75 |
+
|
76 |
+
def fetch_data_for_embeddings(self):
|
77 |
+
document_list = self.get_csv_files()
|
78 |
+
print("document list: " + str(len(document_list)))
|
79 |
+
return document_list
|
80 |
+
|
81 |
+
def get_csv_files(self):
|
82 |
+
loader = CSVLoader(file_path=self.grader.csv, source_column="student_name")
|
83 |
+
document_list = loader.load()
|
84 |
+
return document_list
|
85 |
+
|
86 |
+
def create_chain(self, embeddings):
|
87 |
+
if not self.search_index:
|
88 |
+
self.search_index = self.load_index(embeddings)
|
89 |
+
chain = ConversationalRetrievalChain.from_llm(self.llm, self.search_index.as_retriever(search_type='mmr',
|
90 |
+
search_kwargs={'lambda_mult': 1,
|
91 |
+
'fetch_k': 50,
|
92 |
+
'k': 30}),
|
93 |
+
return_source_documents=True,
|
94 |
+
verbose=True,
|
95 |
+
memory=ConversationSummaryBufferMemory(memory_key='chat_history',
|
96 |
+
llm=self.llm,
|
97 |
+
max_token_limit=40,
|
98 |
+
return_messages=True,
|
99 |
+
output_key='answer'),
|
100 |
+
get_chat_history=get_chat_history,
|
101 |
+
combine_docs_chain_kwargs={"prompt": self.create_prompt()})
|
102 |
+
return chain
|
103 |
+
|
104 |
+
def create_prompt(self):
|
105 |
+
system_template = f"""You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the following questions as best you can.
|
106 |
+
You are a grading assistant who graded the canvas discussions to create the following grading results and feedback. Use the following pieces of the grading results and feedback to answer the users question.
|
107 |
+
Use the following pieces of context to answer the users question.
|
108 |
+
----------------
|
109 |
+
{self.rubric_text}
|
110 |
+
----------------
|
111 |
+
{{context}}"""
|
112 |
+
messages = [
|
113 |
+
SystemMessagePromptTemplate.from_template(system_template),
|
114 |
+
HumanMessagePromptTemplate.from_template("{question}"),
|
115 |
+
]
|
116 |
+
return ChatPromptTemplate.from_messages(messages)
|
117 |
+
|
118 |
+
def get_tokens(self):
|
119 |
+
total_tokens = 0
|
120 |
+
for doc in self.docs:
|
121 |
+
chat_prompt = self.prompt.format(context=doc, question=self.question)
|
122 |
+
|
123 |
+
num_tokens = self.llm.get_num_tokens(chat_prompt)
|
124 |
+
total_tokens += num_tokens
|
125 |
+
|
126 |
+
# summary = self.llm(summary_prompt)
|
127 |
+
|
128 |
+
# print (f"Summary: {summary.strip()}")
|
129 |
+
# print ("\n")
|
130 |
+
return total_tokens
|
131 |
+
|
132 |
+
def run_qa_chain(self, question):
|
133 |
+
self.question = question
|
134 |
+
self.get_tokens()
|
135 |
+
answer = self.chain(question)
|
136 |
+
return answer
|
137 |
+
|
138 |
+
# system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the following questions as best you can.
|
139 |
+
# You are a grading assistant who graded the canvas discussions to create the following grading results and feedback. Use the following pieces of the grading results and feedback to answer the users question.
|
140 |
+
# Use the following pieces of context to answer the users question.
|
141 |
+
# ----------------
|
142 |
+
# {context}"""
|
143 |
+
#
|
144 |
+
# messages = [
|
145 |
+
# SystemMessagePromptTemplate.from_template(system_template),
|
146 |
+
# HumanMessagePromptTemplate.from_template("{question}"),
|
147 |
+
# ]
|
148 |
+
# CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
|
149 |
+
#
|
150 |
+
#
|
151 |
+
# def get_search_index(embeddings):
|
152 |
+
# global vectorstore_index
|
153 |
+
# if os.path.isfile(pickle_file) and os.path.isfile(index_file) and os.path.getsize(pickle_file) > 0:
|
154 |
+
# # Load index from pickle file
|
155 |
+
# search_index = load_index(embeddings)
|
156 |
+
# else:
|
157 |
+
# search_index = create_index(model)
|
158 |
+
# print("Created index")
|
159 |
+
#
|
160 |
+
# vectorstore_index = search_index
|
161 |
+
# return search_index
|
162 |
+
#
|
163 |
+
#
|
164 |
+
# def create_index(embeddings):
|
165 |
+
# source_chunks = create_chunk_documents()
|
166 |
+
# search_index = search_index_from_docs(source_chunks, embeddings)
|
167 |
+
# # search_index.persist()
|
168 |
+
# FAISS.save_local(search_index, folder_path="vector_stores/", index_name="canvas-discussions")
|
169 |
+
# # Save index to pickle file
|
170 |
+
# # with open(pickle_file, "wb") as f:
|
171 |
+
# # pickle.dump(search_index, f)
|
172 |
+
# return search_index
|
173 |
+
#
|
174 |
+
#
|
175 |
+
# def search_index_from_docs(source_chunks, embeddings):
|
176 |
+
# # print("source chunks: " + str(len(source_chunks)))
|
177 |
+
# # print("embeddings: " + str(embeddings))
|
178 |
+
# search_index = FAISS.from_documents(source_chunks, embeddings)
|
179 |
+
# return search_index
|
180 |
+
#
|
181 |
+
#
|
182 |
+
# def get_html_files():
|
183 |
+
# loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
|
184 |
+
# document_list = loader.load()
|
185 |
+
# for document in document_list:
|
186 |
+
# document.metadata["name"] = document.metadata["source"].split("/")[-1].split(".")[0]
|
187 |
+
# return document_list
|
188 |
+
#
|
189 |
+
#
|
190 |
+
# def get_text_files():
|
191 |
+
# loader = DirectoryLoader('docs', glob="**/*.txt", loader_cls=TextLoader, recursive=True)
|
192 |
+
# document_list = loader.load()
|
193 |
+
# return document_list
|
194 |
+
#
|
195 |
+
#
|
196 |
+
# def create_chunk_documents():
|
197 |
+
# sources = fetch_data_for_embeddings()
|
198 |
+
#
|
199 |
+
# splitter = RecursiveCharacterTextSplitter.from_language(
|
200 |
+
# language=Language.HTML, chunk_size=500, chunk_overlap=0
|
201 |
+
# )
|
202 |
+
#
|
203 |
+
# source_chunks = splitter.split_documents(sources)
|
204 |
+
#
|
205 |
+
# print("chunks: " + str(len(source_chunks)))
|
206 |
+
# print("sources: " + str(len(sources)))
|
207 |
+
#
|
208 |
+
# return source_chunks
|
209 |
+
#
|
210 |
+
#
|
211 |
+
# def create_chain(question, llm, embeddings):
|
212 |
+
# db = load_index(embeddings)
|
213 |
+
#
|
214 |
+
# # Create chain
|
215 |
+
# chain = ConversationalRetrievalChain.from_llm(llm, db.as_retriever(search_type='mmr',
|
216 |
+
# search_kwargs={'lambda_mult': 1, 'fetch_k': 50,
|
217 |
+
# 'k': 30}),
|
218 |
+
# return_source_documents=True,
|
219 |
+
# verbose=True,
|
220 |
+
# memory=ConversationSummaryBufferMemory(memory_key='chat_history',
|
221 |
+
# llm=llm, max_token_limit=40,
|
222 |
+
# return_messages=True,
|
223 |
+
# output_key='answer'),
|
224 |
+
# get_chat_history=get_chat_history,
|
225 |
+
# combine_docs_chain_kwargs={"prompt": CHAT_PROMPT})
|
226 |
+
#
|
227 |
+
# result = chain({"question": question})
|
228 |
+
#
|
229 |
+
# sources = []
|
230 |
+
# print(result)
|
231 |
+
#
|
232 |
+
# for document in result['source_documents']:
|
233 |
+
# sources.append("\n" + str(document.metadata))
|
234 |
+
# print(sources)
|
235 |
+
#
|
236 |
+
# source = ',\n'.join(set(sources))
|
237 |
+
# return result['answer'] + '\nSOURCES: ' + source
|
238 |
+
#
|
239 |
+
#
|
240 |
+
# def load_index(embeddings):
|
241 |
+
# # Load index
|
242 |
+
# db = FAISS.load_local(
|
243 |
+
# folder_path="vector_stores/",
|
244 |
+
# index_name="canvas-discussions", embeddings=embeddings,
|
245 |
+
# )
|
246 |
+
# return db
|
247 |
+
#
|
248 |
+
#
|
249 |
+
# def get_chat_history(inputs) -> str:
|
250 |
+
# res = []
|
251 |
+
# for human, ai in inputs:
|
252 |
+
# res.append(f"Human:{human}\nAI:{ai}")
|
253 |
+
# return "\n".join(res)
|