Spaces:
Runtime error
Runtime error
Grading changes
Browse files- .gitattributes +1 -0
- app.py +96 -0
- custom_faiss.py +125 -0
- discussion.py +50 -0
- discussion_.py +110 -0
- discussion_1.py +39 -0
- main.py +11 -0
- models/openai_vs.index +3 -0
- models/openai_vs.pkl +0 -0
- requirements.txt +13 -0
- schema.py +30 -0
- utils.py +515 -0
.gitattributes
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.index filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from main import index, run
|
3 |
+
from gtts import gTTS
|
4 |
+
import os, time
|
5 |
+
|
6 |
+
from transformers import pipeline
|
7 |
+
|
8 |
+
p = pipeline("automatic-speech-recognition")
|
9 |
+
|
10 |
+
"""Use text to call chat method from main.py"""
|
11 |
+
|
12 |
+
def add_text(history, text):
|
13 |
+
print("Question asked: " + text)
|
14 |
+
response = run_model(text)
|
15 |
+
history = history + [(text, response)]
|
16 |
+
print(history)
|
17 |
+
return history, ""
|
18 |
+
|
19 |
+
|
20 |
+
def run_model(text):
|
21 |
+
start_time = time.time()
|
22 |
+
print("start time:" + str(start_time))
|
23 |
+
response = run(text)
|
24 |
+
end_time = time.time()
|
25 |
+
# If response contains string `SOURCES:`, then add a \n before `SOURCES`
|
26 |
+
if "SOURCES:" in response:
|
27 |
+
response = response.replace("SOURCES:", "\nSOURCES:")
|
28 |
+
# response = response + "\n\n" + "Time taken: " + str(end_time - start_time)
|
29 |
+
print(response)
|
30 |
+
print("Time taken: " + str(end_time - start_time))
|
31 |
+
return response
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
def get_output(history, audio):
|
36 |
+
|
37 |
+
txt = p(audio)["text"]
|
38 |
+
# history.append(( (audio, ) , txt))
|
39 |
+
audio_path = 'response.wav'
|
40 |
+
response = run_model(txt)
|
41 |
+
# Remove all text from SOURCES: to the end of the string
|
42 |
+
trimmed_response = response.split("SOURCES:")[0]
|
43 |
+
myobj = gTTS(text=trimmed_response, lang='en', slow=False)
|
44 |
+
myobj.save(audio_path)
|
45 |
+
# split audio by / and keep the last element
|
46 |
+
# audio = audio.split("/")[-1]
|
47 |
+
# audio = audio + ".wav"
|
48 |
+
history.append(( (audio, ) , (audio_path, )))
|
49 |
+
print(history)
|
50 |
+
return history
|
51 |
+
|
52 |
+
def set_model(history):
|
53 |
+
history = get_first_message(history)
|
54 |
+
index()
|
55 |
+
return history
|
56 |
+
|
57 |
+
|
58 |
+
def get_first_message(history):
|
59 |
+
history = [(None,
|
60 |
+
'Get your canvas disucssion graded. Add your discussion url and get your discussions graded in instantly.')]
|
61 |
+
return history
|
62 |
+
|
63 |
+
|
64 |
+
def bot(history):
|
65 |
+
return history
|
66 |
+
|
67 |
+
with gr.Blocks() as demo:
|
68 |
+
|
69 |
+
chatbot = gr.Chatbot(get_first_message([]), elem_id="chatbot").style(height=600)
|
70 |
+
|
71 |
+
with gr.Row():
|
72 |
+
with gr.Column(scale=0.75):
|
73 |
+
txt = gr.Textbox(
|
74 |
+
label="8 Nous Grading Bot",
|
75 |
+
placeholder="Enter text and press enter, or upload an image", lines=1
|
76 |
+
).style(container=False)
|
77 |
+
|
78 |
+
with gr.Column(scale=0.25):
|
79 |
+
audio = gr.Audio(source="microphone", type="filepath").style(container=False)
|
80 |
+
|
81 |
+
txt.submit(add_text, [chatbot, txt], [chatbot, txt], postprocess=False).then(
|
82 |
+
bot, chatbot, chatbot
|
83 |
+
)
|
84 |
+
|
85 |
+
audio.change(fn=get_output, inputs=[chatbot, audio], outputs=[chatbot]).then(
|
86 |
+
bot, chatbot, chatbot
|
87 |
+
)
|
88 |
+
|
89 |
+
audio.change(lambda:None, None, audio)
|
90 |
+
|
91 |
+
set_model(chatbot)
|
92 |
+
|
93 |
+
if __name__ == "__main__":
|
94 |
+
demo.queue()
|
95 |
+
demo.queue(concurrency_count=5)
|
96 |
+
demo.launch(debug=True)
|
custom_faiss.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.vectorstores import FAISS
|
2 |
+
import math
|
3 |
+
import os
|
4 |
+
import pickle
|
5 |
+
import uuid
|
6 |
+
from pathlib import Path
|
7 |
+
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
from langchain.docstore.base import AddableMixin, Docstore
|
12 |
+
from langchain.docstore.document import Document
|
13 |
+
from langchain.docstore.in_memory import InMemoryDocstore
|
14 |
+
from langchain.embeddings.base import Embeddings
|
15 |
+
from langchain.vectorstores.base import VectorStore
|
16 |
+
from langchain.vectorstores.utils import maximal_marginal_relevance
|
17 |
+
|
18 |
+
|
19 |
+
class MyFAISS(FAISS):
|
20 |
+
|
21 |
+
def max_marginal_relevance_search_by_vector(
|
22 |
+
self,
|
23 |
+
embedding: List[float],
|
24 |
+
k: int = 4,
|
25 |
+
fetch_k: int = 20,
|
26 |
+
lambda_mult: float = 0.5,
|
27 |
+
filter: Optional[Dict[str, Any]] = None,
|
28 |
+
**kwargs: Any,
|
29 |
+
) -> List[Document]:
|
30 |
+
"""Return docs selected using the maximal marginal relevance.
|
31 |
+
|
32 |
+
Maximal marginal relevance optimizes for similarity to query AND diversity
|
33 |
+
among selected documents.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
embedding: Embedding to look up documents similar to.
|
37 |
+
k: Number of Documents to return. Defaults to 4.
|
38 |
+
fetch_k: Number of Documents to fetch before filtering to
|
39 |
+
pass to MMR algorithm.
|
40 |
+
lambda_mult: Number between 0 and 1 that determines the degree
|
41 |
+
of diversity among the results with 0 corresponding
|
42 |
+
to maximum diversity and 1 to minimum diversity.
|
43 |
+
Defaults to 0.5.
|
44 |
+
Returns:
|
45 |
+
List of Documents selected by maximal marginal relevance.
|
46 |
+
"""
|
47 |
+
_, indices = self.index.search(
|
48 |
+
np.array([embedding], dtype=np.float32),
|
49 |
+
fetch_k if filter is None else fetch_k * 2,
|
50 |
+
)
|
51 |
+
if filter is not None:
|
52 |
+
filtered_indices = []
|
53 |
+
for i in indices[0]:
|
54 |
+
if i == -1:
|
55 |
+
# This happens when not enough docs are returned.
|
56 |
+
continue
|
57 |
+
_id = self.index_to_docstore_id[i]
|
58 |
+
doc = self.docstore.search(_id)
|
59 |
+
if not isinstance(doc, Document):
|
60 |
+
raise ValueError(f"Could not find document for id {_id}, got {doc}")
|
61 |
+
|
62 |
+
print("metadata: " + str(doc.metadata))
|
63 |
+
print("filter: " + str(filter))
|
64 |
+
if any(filter_word in doc.metadata.get(key, '') for key, value in filter.items() for filter_word in
|
65 |
+
value.split()):
|
66 |
+
filtered_indices.append(i)
|
67 |
+
indices = np.array([filtered_indices])
|
68 |
+
# -1 happens when not enough docs are returned.
|
69 |
+
embeddings = [self.index.reconstruct(int(i)) for i in indices[0] if i != -1]
|
70 |
+
mmr_selected = maximal_marginal_relevance(
|
71 |
+
np.array([embedding], dtype=np.float32),
|
72 |
+
embeddings,
|
73 |
+
k=k,
|
74 |
+
lambda_mult=lambda_mult,
|
75 |
+
)
|
76 |
+
selected_indices = [indices[0][i] for i in mmr_selected]
|
77 |
+
docs = []
|
78 |
+
for i in selected_indices:
|
79 |
+
if i == -1:
|
80 |
+
# This happens when not enough docs are returned.
|
81 |
+
continue
|
82 |
+
_id = self.index_to_docstore_id[i]
|
83 |
+
doc = self.docstore.search(_id)
|
84 |
+
if not isinstance(doc, Document):
|
85 |
+
raise ValueError(f"Could not find document for id {_id}, got {doc}")
|
86 |
+
docs.append(doc)
|
87 |
+
return docs
|
88 |
+
|
89 |
+
def max_marginal_relevance_search(
|
90 |
+
self,
|
91 |
+
query: str,
|
92 |
+
k: int = 4,
|
93 |
+
fetch_k: int = 20,
|
94 |
+
lambda_mult: float = 0.5,
|
95 |
+
filter: Optional[Dict[str, Any]] = None,
|
96 |
+
**kwargs: Any,
|
97 |
+
) -> List[Document]:
|
98 |
+
"""Return docs selected using the maximal marginal relevance.
|
99 |
+
|
100 |
+
Maximal marginal relevance optimizes for similarity to query AND diversity
|
101 |
+
among selected documents.
|
102 |
+
|
103 |
+
Args:
|
104 |
+
query: Text to look up documents similar to.
|
105 |
+
k: Number of Documents to return. Defaults to 4.
|
106 |
+
fetch_k: Number of Documents to fetch before filtering (if needed) to
|
107 |
+
pass to MMR algorithm.
|
108 |
+
lambda_mult: Number between 0 and 1 that determines the degree
|
109 |
+
of diversity among the results with 0 corresponding
|
110 |
+
to maximum diversity and 1 to minimum diversity.
|
111 |
+
Defaults to 0.5.
|
112 |
+
Returns:
|
113 |
+
List of Documents selected by maximal marginal relevance.
|
114 |
+
"""
|
115 |
+
print("MMR search")
|
116 |
+
embedding = self.embedding_function(query)
|
117 |
+
docs = self.max_marginal_relevance_search_by_vector(
|
118 |
+
embedding,
|
119 |
+
k,
|
120 |
+
fetch_k,
|
121 |
+
lambda_mult=lambda_mult,
|
122 |
+
filter=filter,
|
123 |
+
**kwargs,
|
124 |
+
)
|
125 |
+
return docs
|
discussion.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
|
4 |
+
# Replace these variables with your own information
|
5 |
+
access_token = 'YOUR_ACCESS_TOKEN'
|
6 |
+
course_id = '36263'
|
7 |
+
discussion_topic_id = '421517'
|
8 |
+
base_url = 'https://canvas.illinois.edu'
|
9 |
+
|
10 |
+
headers = {
|
11 |
+
'Authorization': f'Bearer {access_token}'
|
12 |
+
}
|
13 |
+
|
14 |
+
# Create a content export
|
15 |
+
export_url = f'{base_url}/api/v1/courses/{course_id}/content_exports'
|
16 |
+
export_params = {
|
17 |
+
'export_type': 'common_cartridge',
|
18 |
+
'skip_notifications': True,
|
19 |
+
'select': {
|
20 |
+
'discussion_topics': [discussion_topic_id]
|
21 |
+
}
|
22 |
+
}
|
23 |
+
|
24 |
+
export_response = requests.post(export_url, headers=headers, params=export_params)
|
25 |
+
|
26 |
+
if export_response.ok:
|
27 |
+
export_data = export_response.json()
|
28 |
+
export_id = export_data['id']
|
29 |
+
|
30 |
+
# Check the progress of the content export
|
31 |
+
progress_url = f'{base_url}/api/v1/progress/{export_id}'
|
32 |
+
progress_response = requests.get(progress_url, headers=headers)
|
33 |
+
|
34 |
+
if progress_response.ok:
|
35 |
+
progress_data = progress_response.json()
|
36 |
+
while progress_data['workflow_state'] not in ['completed', 'failed']:
|
37 |
+
progress_response = requests.get(progress_url, headers=headers)
|
38 |
+
progress_data = progress_response.json()
|
39 |
+
|
40 |
+
if progress_data['workflow_state'] == 'completed':
|
41 |
+
# Download the exported content
|
42 |
+
download_url = progress_data['url']
|
43 |
+
download_response = requests.get(download_url)
|
44 |
+
|
45 |
+
if download_response.ok:
|
46 |
+
# Save the exported content to a file
|
47 |
+
with open('discussion_topic_export.imscc', 'wb') as f:
|
48 |
+
f.write(download_response.content)
|
49 |
+
else:
|
50 |
+
print(f'Error: {export_response.text}')
|
discussion_.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
from typing import List
|
5 |
+
|
6 |
+
class DiscussionEntry:
|
7 |
+
def __init__(self, id: int, parent_id: int, name: str, message: str, replies: List):
|
8 |
+
self.id = id
|
9 |
+
self.parent_id = parent_id
|
10 |
+
self.name = name
|
11 |
+
self.message = message
|
12 |
+
self.replies = replies
|
13 |
+
|
14 |
+
def to_json(self):
|
15 |
+
return {
|
16 |
+
'id': self.id,
|
17 |
+
'parent_id': self.parent_id,
|
18 |
+
'name': self.name,
|
19 |
+
'message': self.message,
|
20 |
+
'replies': [reply.to_json() for reply in self.replies]
|
21 |
+
}
|
22 |
+
|
23 |
+
def extract_entries(entries, participants):
|
24 |
+
result = []
|
25 |
+
for entry in entries:
|
26 |
+
if 'message' in entry and 'deleted' not in entry:
|
27 |
+
id = entry['id']
|
28 |
+
parent_id = entry['parent_id']
|
29 |
+
user_id = entry['user_id']
|
30 |
+
name = next((p['display_name'] for p in participants if p['id'] == user_id), None)
|
31 |
+
message = entry['message']
|
32 |
+
replies = []
|
33 |
+
if 'replies' in entry:
|
34 |
+
replies = extract_entries(entry['replies'], participants)
|
35 |
+
result.append(DiscussionEntry(id, parent_id, name, message, replies))
|
36 |
+
return result
|
37 |
+
|
38 |
+
def save_messages(entries):
|
39 |
+
|
40 |
+
for entry in entries:
|
41 |
+
# Save the message as an HTML file
|
42 |
+
filename = f'docs/{entry.name}.html'
|
43 |
+
|
44 |
+
# Open file in write/append mode
|
45 |
+
with open(filename, 'a+') as f:
|
46 |
+
if entry.parent_id == None:
|
47 |
+
f.write(f'<p><b>Student Post: {entry.name}</b></p>')
|
48 |
+
f.write(entry.message)
|
49 |
+
f.write('<hr>')
|
50 |
+
else:
|
51 |
+
f.write(f'<p><b>Reply to: {entry.parent_id}</b></p>')
|
52 |
+
f.write(entry.message)
|
53 |
+
f.write('<hr>')
|
54 |
+
|
55 |
+
|
56 |
+
# Save the messages of the replies
|
57 |
+
for entry in entries:
|
58 |
+
|
59 |
+
save_messages(entry.replies)
|
60 |
+
|
61 |
+
# Replace these variables with your own information
|
62 |
+
access_token = ''
|
63 |
+
course_id = '36263'
|
64 |
+
discussion_topic_id = '421517'
|
65 |
+
base_url = 'https://canvas.illinois.edu'
|
66 |
+
|
67 |
+
headers = {
|
68 |
+
'Authorization': f'Bearer {access_token}'
|
69 |
+
}
|
70 |
+
|
71 |
+
# Retrieve the full discussion topic data
|
72 |
+
discussion_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}/view'
|
73 |
+
discussion_response = requests.get(discussion_url, headers=headers)
|
74 |
+
|
75 |
+
if discussion_response.ok:
|
76 |
+
discussion_data = discussion_response.json()
|
77 |
+
with open('discussion_data.json', 'w') as f:
|
78 |
+
json.dump(discussion_data, f)
|
79 |
+
|
80 |
+
# Extract the desired fields from the replies and responses
|
81 |
+
entries = extract_entries(discussion_data['view'], discussion_data['participants'])
|
82 |
+
|
83 |
+
# Save the extracted data to a file
|
84 |
+
with open('discussion_entries.json', 'w') as f:
|
85 |
+
json.dump([entry.to_json() for entry in entries], f)
|
86 |
+
|
87 |
+
# Create the /docs directory if it does not exist
|
88 |
+
os.makedirs('docs', exist_ok=True)
|
89 |
+
|
90 |
+
# Save the messages as HTML files under the /docs directory
|
91 |
+
save_messages(entries)
|
92 |
+
|
93 |
+
# Extract the rubric and save it to a file
|
94 |
+
if 'rubric' in discussion_data:
|
95 |
+
rubric = discussion_data['rubric']
|
96 |
+
with open('rubric.json', 'w') as f:
|
97 |
+
json.dump(rubric, f)
|
98 |
+
else:
|
99 |
+
print(f'Error: {discussion_response.text}')
|
100 |
+
|
101 |
+
rubric_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}'
|
102 |
+
rubric_response = requests.get(rubric_url, headers=headers)
|
103 |
+
|
104 |
+
if rubric_response.ok:
|
105 |
+
rubric_data = rubric_response.json()
|
106 |
+
# print(rubric_data)
|
107 |
+
if 'rubric' in rubric_data['assignment']:
|
108 |
+
rubric = rubric_data['assignment']['rubric']
|
109 |
+
with open('rubric_data.json', 'w') as f:
|
110 |
+
json.dump(rubric, f)
|
discussion_1.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
|
4 |
+
# Replace these variables with your own information
|
5 |
+
access_token = ''
|
6 |
+
course_id = '36263'
|
7 |
+
discussion_topic_id = '421517'
|
8 |
+
base_url = 'https://canvas.illinois.edu'
|
9 |
+
|
10 |
+
headers = {
|
11 |
+
'Authorization': f'Bearer {access_token}'
|
12 |
+
}
|
13 |
+
|
14 |
+
# Retrieve the full discussion topic data
|
15 |
+
discussion_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}/view'
|
16 |
+
discussion_response = requests.get(discussion_url, headers=headers)
|
17 |
+
|
18 |
+
if discussion_response.ok:
|
19 |
+
discussion_data = discussion_response.json()
|
20 |
+
|
21 |
+
with open('discussion_data.json', 'w') as f:
|
22 |
+
json.dump(discussion_data, f)
|
23 |
+
|
24 |
+
# Extract the replies and responses
|
25 |
+
discussions = []
|
26 |
+
replies = []
|
27 |
+
for entry in discussion_data['view']:
|
28 |
+
discussions.extend(entry)
|
29 |
+
if 'replies' in entry:
|
30 |
+
replies.extend(entry['replies'])
|
31 |
+
|
32 |
+
with open('discussions.json', 'w') as f:
|
33 |
+
json.dump(discussions, f)
|
34 |
+
|
35 |
+
# Save the replies and responses to a file
|
36 |
+
with open('discussion_replies.json', 'w') as f:
|
37 |
+
json.dump(replies, f)
|
38 |
+
else:
|
39 |
+
print(f'Error: {discussion_response.text}')
|
main.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils import get_search_index, generate_answer, set_model_and_embeddings, get_question_type
|
2 |
+
|
3 |
+
def index():
|
4 |
+
set_model_and_embeddings()
|
5 |
+
get_search_index()
|
6 |
+
return True
|
7 |
+
|
8 |
+
def run(question):
|
9 |
+
index()
|
10 |
+
# return generate_answer(question)
|
11 |
+
return get_question_type(question)
|
models/openai_vs.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76c918e13944c7b3a409671e5d5ec4f94a4b260b82bd197b2bc3a39c433e1f9d
|
3 |
+
size 196653
|
models/openai_vs.pkl
ADDED
Binary file (277 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
openai
|
3 |
+
faiss-cpu==1.7.3
|
4 |
+
unstructured==0.5.8
|
5 |
+
ffmpeg-python
|
6 |
+
transformers
|
7 |
+
gtts
|
8 |
+
torch
|
9 |
+
tiktoken
|
10 |
+
huggingface-hub
|
11 |
+
google-generativeai
|
12 |
+
gradio
|
13 |
+
jq
|
schema.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import List, Optional
|
3 |
+
|
4 |
+
class ForumUser(BaseModel):
|
5 |
+
id: int
|
6 |
+
anonymous_id: str
|
7 |
+
display_name: str
|
8 |
+
avatar_image_url: str
|
9 |
+
html_url: str
|
10 |
+
pronouns: Optional[str]
|
11 |
+
|
12 |
+
class ForumPost(BaseModel):
|
13 |
+
id: int
|
14 |
+
user_id: int
|
15 |
+
parent_id: Optional[int]
|
16 |
+
created_at: str
|
17 |
+
updated_at: str
|
18 |
+
rating_count: Optional[int]
|
19 |
+
rating_sum: Optional[int]
|
20 |
+
user_name: str
|
21 |
+
message: str
|
22 |
+
user: ForumUser
|
23 |
+
read_state: str
|
24 |
+
forced_read_state: bool
|
25 |
+
|
26 |
+
def get_data_from_json(file_path):
|
27 |
+
with open(file_path, "r") as f:
|
28 |
+
json_data = json.load(f)
|
29 |
+
data = [ForumPost(**item) for item in json_data]
|
30 |
+
return data
|
utils.py
ADDED
@@ -0,0 +1,515 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import langchain
|
4 |
+
|
5 |
+
import faiss
|
6 |
+
from langchain import HuggingFaceHub, PromptTemplate
|
7 |
+
from langchain.chains import ConversationalRetrievalChain, LLMChain
|
8 |
+
from langchain.chat_models import ChatOpenAI
|
9 |
+
from langchain.llms import OpenAI
|
10 |
+
from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredHTMLLoader
|
11 |
+
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings
|
12 |
+
from langchain.memory import ConversationBufferWindowMemory
|
13 |
+
from langchain.prompts.chat import (
|
14 |
+
ChatPromptTemplate,
|
15 |
+
HumanMessagePromptTemplate,
|
16 |
+
SystemMessagePromptTemplate,
|
17 |
+
StringPromptTemplate
|
18 |
+
)
|
19 |
+
from langchain.output_parsers import PydanticOutputParser
|
20 |
+
from langchain.tools.json.tool import JsonSpec
|
21 |
+
|
22 |
+
from typing import List, Union, Callable
|
23 |
+
from langchain.schema import AgentAction, AgentFinish
|
24 |
+
import re
|
25 |
+
from langchain.text_splitter import CharacterTextSplitter
|
26 |
+
from custom_faiss import MyFAISS
|
27 |
+
from langchain.cache import InMemoryCache
|
28 |
+
from langchain.chat_models import ChatGooglePalm
|
29 |
+
from langchain.document_loaders import JSONLoader
|
30 |
+
from langchain.agents import initialize_agent, Tool, AgentType
|
31 |
+
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser, BaseMultiActionAgent
|
32 |
+
from langchain.tools import StructuredTool
|
33 |
+
from langchain.chains import create_tagging_chain
|
34 |
+
from typing import List, Tuple, Any, Union
|
35 |
+
from langchain.schema import AgentAction, AgentFinish
|
36 |
+
from pydantic import BaseModel, Field
|
37 |
+
from typing import Optional
|
38 |
+
|
39 |
+
class ToolArgsSchema(BaseModel):
|
40 |
+
student_name: Optional[str] = Field(description="The name of the student")
|
41 |
+
question: str = Field(description="The question being asked")
|
42 |
+
question_type: str = Field(description="The type of question being asked")
|
43 |
+
interest: Optional[str] = Field(description="The interest of the student")
|
44 |
+
|
45 |
+
class Config:
|
46 |
+
schema_extra = {
|
47 |
+
"required": ["question", "question_type"]
|
48 |
+
}
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
langchain.llm_cache = InMemoryCache()
|
55 |
+
|
56 |
+
model_name = "GPT-4"
|
57 |
+
|
58 |
+
pickle_file = "_vs.pkl"
|
59 |
+
index_file = "_vs.index"
|
60 |
+
models_folder = "models/"
|
61 |
+
os.environ["LANGCHAIN_TRACING"] = "true"
|
62 |
+
discussions_file_path = "discussion_entries.json"
|
63 |
+
|
64 |
+
llm = OpenAI(model_name="gpt-3.5-turbo-16k", temperature=0, verbose=True)
|
65 |
+
|
66 |
+
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
|
67 |
+
|
68 |
+
chat_history = []
|
69 |
+
|
70 |
+
memory = ConversationBufferWindowMemory(memory_key="chat_history", k=10)
|
71 |
+
|
72 |
+
vectorstore_index = None
|
73 |
+
|
74 |
+
agent_prompt = """
|
75 |
+
I am the LLM AI canvas discussion grading assistant.
|
76 |
+
I can answer two types of questions: grade-based questions and interest-based questions.
|
77 |
+
Grade-based questions are about the grades of a certain student or a group of students based on the rubric below for the canvas discussion on the topic 8 nouns.
|
78 |
+
Interest-based questions are about the interests or skills of a certain student or a group of students based on their discussion posts.
|
79 |
+
You have access to the following tools:
|
80 |
+
|
81 |
+
{tools}
|
82 |
+
|
83 |
+
Use the following format:
|
84 |
+
|
85 |
+
Question: the input question you must answer
|
86 |
+
Thought: you should always think about type of question it is
|
87 |
+
Action: the action to take, should be one of [{tool_names}]
|
88 |
+
Action Input: the input to the action
|
89 |
+
Observation: the result of the action
|
90 |
+
... (this Thought/Action/Action Input/Observation can repeat N times)
|
91 |
+
Thought: I now know the final answer
|
92 |
+
Final Answer: the final answer to the original input question
|
93 |
+
|
94 |
+
Begin!
|
95 |
+
|
96 |
+
Question: {input}
|
97 |
+
{agent_scratchpad}
|
98 |
+
"""
|
99 |
+
|
100 |
+
# Set up a prompt template
|
101 |
+
class CustomPromptTemplate(StringPromptTemplate):
|
102 |
+
# The template to use
|
103 |
+
template: str
|
104 |
+
############## NEW ######################
|
105 |
+
# The list of tools available
|
106 |
+
tools_getter: Callable
|
107 |
+
|
108 |
+
def format(self, **kwargs) -> str:
|
109 |
+
# Get the intermediate steps (AgentAction, Observation tuples)
|
110 |
+
# Format them in a particular way
|
111 |
+
intermediate_steps = kwargs.pop("intermediate_steps")
|
112 |
+
thoughts = ""
|
113 |
+
for action, observation in intermediate_steps:
|
114 |
+
thoughts += action.log
|
115 |
+
thoughts += f"\nObservation: {observation}\nThought: "
|
116 |
+
# Set the agent_scratchpad variable to that value
|
117 |
+
kwargs["agent_scratchpad"] = thoughts
|
118 |
+
############## NEW ######################
|
119 |
+
tools = self.tools_getter(kwargs["input"])
|
120 |
+
# Create a tools variable from the list of tools provided
|
121 |
+
kwargs["tools"] = "\n".join(
|
122 |
+
[f"{tool.name}: {tool.description}" for tool in tools]
|
123 |
+
)
|
124 |
+
# Create a list of tool names for the tools provided
|
125 |
+
kwargs["tool_names"] = ", ".join([tool.name for tool in tools])
|
126 |
+
return self.template.format(**kwargs)
|
127 |
+
|
128 |
+
class CustomOutputParser(AgentOutputParser):
|
129 |
+
|
130 |
+
def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
|
131 |
+
print("llm_output")
|
132 |
+
print(llm_output)
|
133 |
+
# Check if agent should finish
|
134 |
+
if "Final Answer:" in llm_output:
|
135 |
+
return AgentFinish(
|
136 |
+
# Return values is generally always a dictionary with a single `output` key
|
137 |
+
# It is not recommended to try anything else at the moment :)
|
138 |
+
return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
|
139 |
+
log=llm_output,
|
140 |
+
)
|
141 |
+
# Parse out the action and action input
|
142 |
+
regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
|
143 |
+
match = re.search(regex, llm_output, re.DOTALL)
|
144 |
+
if not match:
|
145 |
+
raise ValueError(f"Could not parse LLM output: `{llm_output}`")
|
146 |
+
action = match.group(1).strip()
|
147 |
+
action_input = match.group(2)
|
148 |
+
# Return the action and action input
|
149 |
+
return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
|
150 |
+
|
151 |
+
system_template = """
|
152 |
+
I am the LLM AI canvas discussion grading assistant.
|
153 |
+
I can answer two types of questions: grade-based questions and interest-based questions.
|
154 |
+
Grade-based questions are about the grades of a certain student or a group of students based on the rubric below for the canvas discussion on the topic 8 nouns.
|
155 |
+
Interest-based questions are about the interests or skills of a certain student or a group of students based on their discussion posts.
|
156 |
+
To grade student discussions, I will follow the rubric below.
|
157 |
+
|
158 |
+
Student Post
|
159 |
+
|
160 |
+
3 points: Post includes 8 nouns and text describing how these nouns relate to the student.
|
161 |
+
2 points: Student's post includes 8 nouns but does not offer how those nouns relate to the student.
|
162 |
+
1 point: Student's post has significant missing details.
|
163 |
+
0 points: The student does not provide an initial post, or otherwise does not follow assignment instructions.
|
164 |
+
|
165 |
+
|
166 |
+
Response to Others
|
167 |
+
|
168 |
+
3 points: Student responds to at least 3 other student discussion threads AND responds to questions asked of them. Student posts insightful comments that prompt on target discussion. These posts also avoid throw away comments such as I agree, Me too, Good idea.
|
169 |
+
2 points: Student was notably lacking in one criterion.
|
170 |
+
1 point: Student was notably lacking in two criteria.
|
171 |
+
0 points: The student does not interact in the threads of other students.
|
172 |
+
I will be able to identify each student by name, and I will be able to share their likings, interests, and other characteristics. I will also be able to filter out students based on their interests.
|
173 |
+
|
174 |
+
I will not deviate from the grading scheme. I will grade each discussion entry and reply carefully, and I will share the grades of all individuals by name on the basis of the rubric with final score.
|
175 |
+
|
176 |
+
The discussions and their replies are in following format:
|
177 |
+
Student Post: Student Name
|
178 |
+
Reply to: Another Student Discussion ID
|
179 |
+
|
180 |
+
Following are the relevant discussions to grade or answer the interest based questions
|
181 |
+
----------------
|
182 |
+
Discussions:
|
183 |
+
{context}"""
|
184 |
+
|
185 |
+
messages = [
|
186 |
+
SystemMessagePromptTemplate.from_template(system_template),
|
187 |
+
HumanMessagePromptTemplate.from_template("{question}"),
|
188 |
+
]
|
189 |
+
CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
|
190 |
+
|
191 |
+
|
192 |
+
def set_model_and_embeddings():
|
193 |
+
global chat_history
|
194 |
+
# set_model(model)
|
195 |
+
# set_embeddings(model)
|
196 |
+
chat_history = []
|
197 |
+
|
198 |
+
def set_embeddings(model):
|
199 |
+
global embeddings
|
200 |
+
if model == "GPT-3.5" or model == "GPT-4":
|
201 |
+
print("Loading OpenAI embeddings")
|
202 |
+
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
|
203 |
+
elif model == "Flan UL2" or model == "Flan T5":
|
204 |
+
print("Loading Hugging Face embeddings")
|
205 |
+
embeddings = HuggingFaceHubEmbeddings(repo_id="sentence-transformers/all-MiniLM-L6-v2")
|
206 |
+
|
207 |
+
|
208 |
+
def get_search_index():
|
209 |
+
global vectorstore_index, model_name
|
210 |
+
if os.path.isfile(get_file_path(model_name, pickle_file)) and os.path.isfile(
|
211 |
+
get_file_path(model_name, index_file)) and os.path.getsize(get_file_path(model_name, pickle_file)) > 0:
|
212 |
+
# Load index from pickle file
|
213 |
+
with open(get_file_path(model_name, pickle_file), "rb") as f:
|
214 |
+
# search_index = Chroma(persist_directory=models_folder, embedding_function=embeddings)
|
215 |
+
search_index = pickle.load(f)
|
216 |
+
print("Loaded index")
|
217 |
+
else:
|
218 |
+
search_index = create_index(model_name)
|
219 |
+
print("Created index")
|
220 |
+
|
221 |
+
vectorstore_index = search_index
|
222 |
+
return search_index
|
223 |
+
|
224 |
+
|
225 |
+
def create_index(model):
|
226 |
+
source_chunks = create_chunk_documents()
|
227 |
+
search_index = search_index_from_docs(source_chunks)
|
228 |
+
# search_index.persist()
|
229 |
+
faiss.write_index(search_index.index, get_file_path(model, index_file))
|
230 |
+
# Save index to pickle file
|
231 |
+
with open(get_file_path(model, pickle_file), "wb") as f:
|
232 |
+
pickle.dump(search_index, f)
|
233 |
+
return search_index
|
234 |
+
|
235 |
+
|
236 |
+
def get_file_path(model, file):
|
237 |
+
# If model is GPT3.5 or GPT4 return models_folder + openai + file else return models_folder + hf + file
|
238 |
+
if model == "GPT-3.5" or model == "GPT-4":
|
239 |
+
return models_folder + "openai" + file
|
240 |
+
else:
|
241 |
+
return models_folder + "hf" + file
|
242 |
+
|
243 |
+
|
244 |
+
def search_index_from_docs(source_chunks):
|
245 |
+
# print("source chunks: " + str(len(source_chunks)))
|
246 |
+
# print("embeddings: " + str(embeddings))
|
247 |
+
|
248 |
+
search_index = MyFAISS.from_documents(source_chunks, embeddings)
|
249 |
+
return search_index
|
250 |
+
|
251 |
+
|
252 |
+
def get_html_files():
|
253 |
+
loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
|
254 |
+
document_list = loader.load()
|
255 |
+
for document in document_list:
|
256 |
+
document.metadata["name"] = document.metadata["source"].split("/")[-1].split(".")[0]
|
257 |
+
return document_list
|
258 |
+
|
259 |
+
def metadata_func(record: dict, metadata: dict) -> dict:
|
260 |
+
metadata["name"] = record.get("name")
|
261 |
+
return metadata
|
262 |
+
def get_json_file():
|
263 |
+
global discussions_file_path
|
264 |
+
loader = JSONLoader(
|
265 |
+
file_path=discussions_file_path,
|
266 |
+
jq_schema='.[]', metadata_func=metadata_func, content_key="message")
|
267 |
+
return loader.load()
|
268 |
+
def fetch_data_for_embeddings():
|
269 |
+
# document_list = get_text_files()
|
270 |
+
document_list = get_html_files()
|
271 |
+
# document_list = get_json_file()
|
272 |
+
print("document list: " + str(len(document_list)))
|
273 |
+
return document_list
|
274 |
+
|
275 |
+
|
276 |
+
def get_text_files():
|
277 |
+
loader = DirectoryLoader('docs', glob="**/*.txt", loader_cls=TextLoader, recursive=True)
|
278 |
+
document_list = loader.load()
|
279 |
+
return document_list
|
280 |
+
|
281 |
+
|
282 |
+
def create_chunk_documents():
|
283 |
+
sources = fetch_data_for_embeddings()
|
284 |
+
|
285 |
+
splitter = CharacterTextSplitter(separator=" ", chunk_size=800, chunk_overlap=0)
|
286 |
+
|
287 |
+
source_chunks = splitter.split_documents(sources)
|
288 |
+
|
289 |
+
print("chunks: " + str(len(source_chunks)))
|
290 |
+
|
291 |
+
return sources
|
292 |
+
|
293 |
+
|
294 |
+
def get_qa_chain(vectorstore_index, question, metadata):
|
295 |
+
global llm, model_name
|
296 |
+
print(llm)
|
297 |
+
filter_dict = {"name": metadata.student_name}
|
298 |
+
# embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
|
299 |
+
# compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=gpt_3_5_index.as_retriever())
|
300 |
+
retriever = get_retriever(filter_dict, vectorstore_index, metadata)
|
301 |
+
|
302 |
+
print(retriever.get_relevant_documents(question))
|
303 |
+
|
304 |
+
chain = ConversationalRetrievalChain.from_llm(llm, retriever, return_source_documents=True,
|
305 |
+
verbose=True, get_chat_history=get_chat_history,
|
306 |
+
combine_docs_chain_kwargs={"prompt": CHAT_PROMPT})
|
307 |
+
return chain
|
308 |
+
|
309 |
+
|
310 |
+
def get_retriever(filter_dict, vectorstore_index, metadata):
|
311 |
+
if metadata.question_type == "grade-based":
|
312 |
+
retriever = vectorstore_index.as_retriever(search_type='mmr',
|
313 |
+
search_kwargs={'lambda_mult': 1, 'fetch_k': 20, 'k': 10,
|
314 |
+
'filter': filter_dict})
|
315 |
+
|
316 |
+
else:
|
317 |
+
retriever = vectorstore_index.as_retriever(search_type='mmr',
|
318 |
+
search_kwargs={'lambda_mult': 1, 'fetch_k': 20, 'k': 10})
|
319 |
+
|
320 |
+
return retriever
|
321 |
+
|
322 |
+
|
323 |
+
def get_chat_history(inputs) -> str:
|
324 |
+
res = []
|
325 |
+
for human, ai in inputs:
|
326 |
+
res.append(f"Human:{human}\nAI:{ai}")
|
327 |
+
return "\n".join(res)
|
328 |
+
|
329 |
+
|
330 |
+
def generate_answer(question, metadata: ToolArgsSchema) -> str:
|
331 |
+
# print("filter: " + filter)
|
332 |
+
global chat_history, vectorstore_index
|
333 |
+
chain = get_qa_chain(vectorstore_index, question, metadata)
|
334 |
+
|
335 |
+
result = chain(
|
336 |
+
{"question": question, "chat_history": chat_history})
|
337 |
+
chat_history.extend([(question, result["answer"])])
|
338 |
+
sources = []
|
339 |
+
print(result)
|
340 |
+
|
341 |
+
for document in result['source_documents']:
|
342 |
+
source = document.metadata['source']
|
343 |
+
sources.append(source.split('/')[-1].split('.')[0])
|
344 |
+
print(sources)
|
345 |
+
|
346 |
+
source = ',\n'.join(set(sources))
|
347 |
+
# return result['answer'] + '\nSOURCES: ' + source
|
348 |
+
return result['answer']
|
349 |
+
def get_question_type(question):
|
350 |
+
|
351 |
+
parser = PydanticOutputParser(pydantic_object=ToolArgsSchema)
|
352 |
+
prompt_template = """I can answer two types of questions: grade-based questions and interest-based questions.
|
353 |
+
Grade-based questions are about the grades of a certain student or a group of students based on the rubric below for the canvas discussion on the topic 8 nouns.
|
354 |
+
Interest-based questions are about the interests or skills of a certain student or a group of students based on their discussion posts.
|
355 |
+
Question: {question}
|
356 |
+
Find following information about the question asked. Return Optional empty if the information is not available.:
|
357 |
+
Format instructions: {format_instructions}"""
|
358 |
+
|
359 |
+
llm = OpenAI(temperature=0)
|
360 |
+
prompt = PromptTemplate(template=prompt_template, input_variables=["question"], output_parser=parser, partial_variables={"format_instructions": parser.get_format_instructions()})
|
361 |
+
llm_chain = LLMChain(
|
362 |
+
llm=llm,
|
363 |
+
prompt=prompt,
|
364 |
+
|
365 |
+
)
|
366 |
+
output = llm_chain.run(question)
|
367 |
+
output = parser.parse(output)
|
368 |
+
output = generate_answer(question, output)
|
369 |
+
return output
|
370 |
+
|
371 |
+
|
372 |
+
|
373 |
+
|
374 |
+
|
375 |
+
|
376 |
+
|
377 |
+
|
378 |
+
|
379 |
+
|
380 |
+
|
381 |
+
# class FakeAgent(BaseMultiActionAgent):
|
382 |
+
# """Fake Custom Agent."""
|
383 |
+
#
|
384 |
+
# @property
|
385 |
+
# def input_keys(self):
|
386 |
+
# return ["input"]
|
387 |
+
#
|
388 |
+
# def plan(
|
389 |
+
# self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
|
390 |
+
# ) -> Union[List[AgentAction], AgentFinish]:
|
391 |
+
# print("input keys")
|
392 |
+
# print(self.input_keys)
|
393 |
+
# print("intermediate steps")
|
394 |
+
# print(intermediate_steps)
|
395 |
+
# print("kwargs")
|
396 |
+
# print(kwargs)
|
397 |
+
#
|
398 |
+
# """Given input, decided what to do.
|
399 |
+
#
|
400 |
+
# Args:
|
401 |
+
# intermediate_steps: Steps the LLM has taken to date,
|
402 |
+
# along with observations
|
403 |
+
# **kwargs: User inputs.
|
404 |
+
#
|
405 |
+
# Returns:
|
406 |
+
# Action specifying what tool to use.
|
407 |
+
# """
|
408 |
+
# if len(intermediate_steps) == 0:
|
409 |
+
# first_action = AgentAction(tool="question type", tool_input=kwargs["input"], log="")
|
410 |
+
# print("first action")
|
411 |
+
# print(first_action)
|
412 |
+
# second_action = AgentAction(tool="Grade",tool_input=kwargs["input"], log="")
|
413 |
+
# print("second action")
|
414 |
+
# print(second_action)
|
415 |
+
# return [
|
416 |
+
# first_action,
|
417 |
+
# second_action,
|
418 |
+
# ]
|
419 |
+
# else:
|
420 |
+
# return AgentFinish(return_values={"output": "bar"}, log="")
|
421 |
+
#
|
422 |
+
# async def aplan(
|
423 |
+
# self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
|
424 |
+
# ) -> Union[List[AgentAction], AgentFinish]:
|
425 |
+
# """Given input, decided what to do.
|
426 |
+
#
|
427 |
+
# Args:
|
428 |
+
# intermediate_steps: Steps the LLM has taken to date,
|
429 |
+
# along with observations
|
430 |
+
# **kwargs: User inputs.
|
431 |
+
#
|
432 |
+
# Returns:
|
433 |
+
# Action specifying what tool to use.
|
434 |
+
# """
|
435 |
+
# if len(intermediate_steps) == 0:
|
436 |
+
# return [
|
437 |
+
# AgentAction(tool="question type", tool_input=kwargs["input"], log=""),
|
438 |
+
# AgentAction(tool="Grade",
|
439 |
+
# tool_input={
|
440 |
+
# "student_name": kwargs["student_name"],
|
441 |
+
# "question": kwargs["question"],
|
442 |
+
# "question_type": kwargs["question_type"],
|
443 |
+
# "interest": kwargs["interest"]
|
444 |
+
# }, log=""),
|
445 |
+
# ]
|
446 |
+
# else:
|
447 |
+
# return AgentFinish(return_values={"output": "bar"}, log="")
|
448 |
+
#
|
449 |
+
#
|
450 |
+
# schema = {
|
451 |
+
# "properties": {
|
452 |
+
# "student_name" : {"type": "string", "description": "The name of the student"},
|
453 |
+
# "question": {"type": "string", "description": "The question being asked"},
|
454 |
+
# "question type" : {"type": "string",
|
455 |
+
# "enum": ["student grades", "student specific", "interest specific"],
|
456 |
+
# "description": "The type of question being asked"},
|
457 |
+
# "interest" : {"type": "string", "description": "The interest of the student"},
|
458 |
+
# },
|
459 |
+
# "required": ["question", "question type"]
|
460 |
+
# }
|
461 |
+
|
462 |
+
|
463 |
+
|
464 |
+
|
465 |
+
|
466 |
+
# def get_tagging_chain(question)-> str:
|
467 |
+
# global schema
|
468 |
+
# chain = create_tagging_chain(schema, llm)
|
469 |
+
# first_answer = chain.run(question)
|
470 |
+
# print("first answer:")
|
471 |
+
# print(first_answer)
|
472 |
+
# return first_answer
|
473 |
+
#
|
474 |
+
#
|
475 |
+
# def get_grading_agent():
|
476 |
+
#
|
477 |
+
# tools = [
|
478 |
+
# Tool(
|
479 |
+
# name="question type",
|
480 |
+
# func=get_tagging_chain,
|
481 |
+
# description="Useful when you need to understand the type of the input."
|
482 |
+
# ),
|
483 |
+
# StructuredTool(
|
484 |
+
# name="Grade",
|
485 |
+
# func=generate_answer,
|
486 |
+
# description="Useful when you need to answer questions about students, grades, interests, etc from the context of canvas discussion posts. If the question is student specific, student name is required.",
|
487 |
+
# args_schema=ToolArgsSchema
|
488 |
+
# )
|
489 |
+
# ]
|
490 |
+
# # agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
|
491 |
+
#
|
492 |
+
# agent = FakeAgent(output_parser=CustomOutputParser())
|
493 |
+
# # prompt = CustomPromptTemplate(template=agent_prompt, tools=tools, input_variables=["input", "intermediate_steps"])
|
494 |
+
# # output_parser = CustomOutputParser()
|
495 |
+
# # tool_names = [tool.name for tool in tools]
|
496 |
+
# # llm_chain = LLMChain(llm=llm, prompt=prompt)
|
497 |
+
# # agent = LLMSingleActionAgent(
|
498 |
+
# # llm_chain=llm_chain,
|
499 |
+
# # output_parser=output_parser,
|
500 |
+
# # stop=["\nObservation:"],
|
501 |
+
# # allowed_tools=tool_names,
|
502 |
+
# # )
|
503 |
+
# agent_executor = AgentExecutor.from_agent_and_tools(
|
504 |
+
# agent=agent, tools=tools, verbose=True
|
505 |
+
# )
|
506 |
+
#
|
507 |
+
# # return initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)
|
508 |
+
# return agent_executor
|
509 |
+
#
|
510 |
+
#
|
511 |
+
#
|
512 |
+
# def grade_answer(question) -> str:
|
513 |
+
# global chat_history, vectorstore_index
|
514 |
+
# agent = get_grading_agent()
|
515 |
+
# return agent.run(question)
|