Spaces:

rohan13
/

canvas-discussion-grader

Runtime error

App Files Files Community

rohan13 commited on Jun 20, 2023

Commit

cc93546

1 Parent(s): ab0e4d5

Grading changes

Browse files

Files changed (12) hide show

.gitattributes +1 -0
app.py +96 -0
custom_faiss.py +125 -0
discussion.py +50 -0
discussion_.py +110 -0
discussion_1.py +39 -0
main.py +11 -0
models/openai_vs.index +3 -0
models/openai_vs.pkl +0 -0
requirements.txt +13 -0
schema.py +30 -0
utils.py +515 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.index filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import gradio as gr
+from main import index, run
+from gtts import gTTS
+import os, time
+from transformers import pipeline
+p = pipeline("automatic-speech-recognition")
+"""Use text to call chat method from main.py"""
+def add_text(history, text):
+    print("Question asked: " + text)
+    response = run_model(text)
+    history = history + [(text, response)]
+    print(history)
+    return history, ""
+def run_model(text):
+    start_time = time.time()
+    print("start time:" + str(start_time))
+    response = run(text)
+    end_time = time.time()
+    # If response contains string `SOURCES:`, then add a \n before `SOURCES`
+    if "SOURCES:" in response:
+        response = response.replace("SOURCES:", "\nSOURCES:")
+        # response = response + "\n\n" + "Time taken: " + str(end_time - start_time)
+    print(response)
+    print("Time taken: " + str(end_time - start_time))
+    return response
+def get_output(history, audio):
+    txt = p(audio)["text"]
+    # history.append(( (audio, ) , txt))
+    audio_path = 'response.wav'
+    response = run_model(txt)
+    # Remove all text from SOURCES: to the end of the string
+    trimmed_response = response.split("SOURCES:")[0]
+    myobj = gTTS(text=trimmed_response, lang='en', slow=False)
+    myobj.save(audio_path)
+    # split audio by / and keep the last element
+    # audio = audio.split("/")[-1]
+    # audio = audio + ".wav"
+    history.append(( (audio, ) , (audio_path, )))
+    print(history)
+    return history
+def set_model(history):
+    history = get_first_message(history)
+    index()
+    return history
+def get_first_message(history):
+    history = [(None,
+                'Get your canvas disucssion graded. Add your discussion url and get your discussions graded in instantly.')]
+    return history
+def bot(history):
+    return history
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot(get_first_message([]), elem_id="chatbot").style(height=600)
+    with gr.Row():
+        with gr.Column(scale=0.75):
+            txt = gr.Textbox(
+                label="8 Nous Grading Bot",
+                placeholder="Enter text and press enter, or upload an image", lines=1
+            ).style(container=False)
+        with gr.Column(scale=0.25):
+            audio = gr.Audio(source="microphone", type="filepath").style(container=False)
+    txt.submit(add_text, [chatbot, txt], [chatbot, txt], postprocess=False).then(
+        bot, chatbot, chatbot
+    )
+    audio.change(fn=get_output, inputs=[chatbot, audio], outputs=[chatbot]).then(
+        bot, chatbot, chatbot
+    )
+    audio.change(lambda:None, None, audio)
+    set_model(chatbot)
+if __name__ == "__main__":
+    demo.queue()
+    demo.queue(concurrency_count=5)
+    demo.launch(debug=True)

custom_faiss.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from langchain.vectorstores import FAISS
+import math
+import os
+import pickle
+import uuid
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+import numpy as np
+from langchain.docstore.base import AddableMixin, Docstore
+from langchain.docstore.document import Document
+from langchain.docstore.in_memory import InMemoryDocstore
+from langchain.embeddings.base import Embeddings
+from langchain.vectorstores.base import VectorStore
+from langchain.vectorstores.utils import maximal_marginal_relevance
+class MyFAISS(FAISS):
+    def max_marginal_relevance_search_by_vector(
+            self,
+            embedding: List[float],
+            k: int = 4,
+            fetch_k: int = 20,
+            lambda_mult: float = 0.5,
+            filter: Optional[Dict[str, Any]] = None,
+            **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch before filtering to
+                     pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5.
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+        """
+        _, indices = self.index.search(
+            np.array([embedding], dtype=np.float32),
+            fetch_k if filter is None else fetch_k * 2,
+        )
+        if filter is not None:
+            filtered_indices = []
+            for i in indices[0]:
+                if i == -1:
+                    # This happens when not enough docs are returned.
+                    continue
+                _id = self.index_to_docstore_id[i]
+                doc = self.docstore.search(_id)
+                if not isinstance(doc, Document):
+                    raise ValueError(f"Could not find document for id {_id}, got {doc}")
+                print("metadata: " + str(doc.metadata))
+                print("filter: " + str(filter))
+                if any(filter_word in doc.metadata.get(key, '') for key, value in filter.items() for filter_word in
+                       value.split()):
+                    filtered_indices.append(i)
+            indices = np.array([filtered_indices])
+        # -1 happens when not enough docs are returned.
+        embeddings = [self.index.reconstruct(int(i)) for i in indices[0] if i != -1]
+        mmr_selected = maximal_marginal_relevance(
+            np.array([embedding], dtype=np.float32),
+            embeddings,
+            k=k,
+            lambda_mult=lambda_mult,
+        )
+        selected_indices = [indices[0][i] for i in mmr_selected]
+        docs = []
+        for i in selected_indices:
+            if i == -1:
+                # This happens when not enough docs are returned.
+                continue
+            _id = self.index_to_docstore_id[i]
+            doc = self.docstore.search(_id)
+            if not isinstance(doc, Document):
+                raise ValueError(f"Could not find document for id {_id}, got {doc}")
+            docs.append(doc)
+        return docs
+    def max_marginal_relevance_search(
+            self,
+            query: str,
+            k: int = 4,
+            fetch_k: int = 20,
+            lambda_mult: float = 0.5,
+            filter: Optional[Dict[str, Any]] = None,
+            **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch before filtering (if needed) to
+                     pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5.
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+        """
+        print("MMR search")
+        embedding = self.embedding_function(query)
+        docs = self.max_marginal_relevance_search_by_vector(
+            embedding,
+            k,
+            fetch_k,
+            lambda_mult=lambda_mult,
+            filter=filter,
+            **kwargs,
+        )
+        return docs

discussion.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import requests
+import json
+# Replace these variables with your own information
+access_token = 'YOUR_ACCESS_TOKEN'
+course_id = '36263'
+discussion_topic_id = '421517'
+base_url = 'https://canvas.illinois.edu'
+headers = {
+    'Authorization': f'Bearer {access_token}'
+}
+# Create a content export
+export_url = f'{base_url}/api/v1/courses/{course_id}/content_exports'
+export_params = {
+    'export_type': 'common_cartridge',
+    'skip_notifications': True,
+    'select': {
+        'discussion_topics': [discussion_topic_id]
+    }
+}
+export_response = requests.post(export_url, headers=headers, params=export_params)
+if export_response.ok:
+    export_data = export_response.json()
+    export_id = export_data['id']
+    # Check the progress of the content export
+    progress_url = f'{base_url}/api/v1/progress/{export_id}'
+    progress_response = requests.get(progress_url, headers=headers)
+    if progress_response.ok:
+        progress_data = progress_response.json()
+        while progress_data['workflow_state'] not in ['completed', 'failed']:
+            progress_response = requests.get(progress_url, headers=headers)
+            progress_data = progress_response.json()
+        if progress_data['workflow_state'] == 'completed':
+            # Download the exported content
+            download_url = progress_data['url']
+            download_response = requests.get(download_url)
+            if download_response.ok:
+                # Save the exported content to a file
+                with open('discussion_topic_export.imscc', 'wb') as f:
+                    f.write(download_response.content)
+else:
+    print(f'Error: {export_response.text}')

discussion_.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import requests
+import json
+import os
+from typing import List
+class DiscussionEntry:
+    def __init__(self, id: int, parent_id: int, name: str, message: str, replies: List):
+        self.id = id
+        self.parent_id = parent_id
+        self.name = name
+        self.message = message
+        self.replies = replies
+    def to_json(self):
+        return {
+            'id': self.id,
+            'parent_id': self.parent_id,
+            'name': self.name,
+            'message': self.message,
+            'replies': [reply.to_json() for reply in self.replies]
+        }
+def extract_entries(entries, participants):
+    result = []
+    for entry in entries:
+        if 'message' in entry and 'deleted' not in entry:
+            id = entry['id']
+            parent_id = entry['parent_id']
+            user_id = entry['user_id']
+            name = next((p['display_name'] for p in participants if p['id'] == user_id), None)
+            message = entry['message']
+        replies = []
+        if 'replies' in entry:
+            replies = extract_entries(entry['replies'], participants)
+        result.append(DiscussionEntry(id, parent_id, name, message, replies))
+    return result
+def save_messages(entries):
+    for entry in entries:
+        # Save the message as an HTML file
+        filename = f'docs/{entry.name}.html'
+        # Open file in write/append mode
+        with open(filename, 'a+') as f:
+            if  entry.parent_id == None:
+                f.write(f'<p><b>Student Post: {entry.name}</b></p>')
+                f.write(entry.message)
+                f.write('<hr>')
+            else:
+                f.write(f'<p><b>Reply to: {entry.parent_id}</b></p>')
+                f.write(entry.message)
+                f.write('<hr>')
+        # Save the messages of the replies
+    for entry in entries:
+        save_messages(entry.replies)
+# Replace these variables with your own information
+access_token = ''
+course_id = '36263'
+discussion_topic_id = '421517'
+base_url = 'https://canvas.illinois.edu'
+headers = {
+    'Authorization': f'Bearer {access_token}'
+}
+# Retrieve the full discussion topic data
+discussion_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}/view'
+discussion_response = requests.get(discussion_url, headers=headers)
+if discussion_response.ok:
+    discussion_data = discussion_response.json()
+    with open('discussion_data.json', 'w') as f:
+        json.dump(discussion_data, f)
+    # Extract the desired fields from the replies and responses
+    entries = extract_entries(discussion_data['view'], discussion_data['participants'])
+    # Save the extracted data to a file
+    with open('discussion_entries.json', 'w') as f:
+        json.dump([entry.to_json() for entry in entries], f)
+    # Create the /docs directory if it does not exist
+    os.makedirs('docs', exist_ok=True)
+    # Save the messages as HTML files under the /docs directory
+    save_messages(entries)
+    # Extract the rubric and save it to a file
+    if 'rubric' in discussion_data:
+        rubric = discussion_data['rubric']
+        with open('rubric.json', 'w') as f:
+            json.dump(rubric, f)
+else:
+    print(f'Error: {discussion_response.text}')
+rubric_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}'
+rubric_response = requests.get(rubric_url, headers=headers)
+if rubric_response.ok:
+    rubric_data = rubric_response.json()
+    # print(rubric_data)
+    if 'rubric' in rubric_data['assignment']:
+        rubric = rubric_data['assignment']['rubric']
+        with open('rubric_data.json', 'w') as f:
+            json.dump(rubric, f)

discussion_1.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import requests
+import json
+# Replace these variables with your own information
+access_token = ''
+course_id = '36263'
+discussion_topic_id = '421517'
+base_url = 'https://canvas.illinois.edu'
+headers = {
+    'Authorization': f'Bearer {access_token}'
+}
+# Retrieve the full discussion topic data
+discussion_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}/view'
+discussion_response = requests.get(discussion_url, headers=headers)
+if discussion_response.ok:
+    discussion_data = discussion_response.json()
+    with open('discussion_data.json', 'w') as f:
+        json.dump(discussion_data, f)
+    # Extract the replies and responses
+    discussions = []
+    replies = []
+    for entry in discussion_data['view']:
+        discussions.extend(entry)
+        if 'replies' in entry:
+            replies.extend(entry['replies'])
+    with open('discussions.json', 'w') as f:
+        json.dump(discussions, f)
+    # Save the replies and responses to a file
+    with open('discussion_replies.json', 'w') as f:
+        json.dump(replies, f)
+else:
+    print(f'Error: {discussion_response.text}')

main.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from utils import get_search_index, generate_answer, set_model_and_embeddings, get_question_type
+def index():
+    set_model_and_embeddings()
+    get_search_index()
+    return True
+def run(question):
+    index()
+    # return generate_answer(question)
+    return get_question_type(question)

models/openai_vs.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76c918e13944c7b3a409671e5d5ec4f94a4b260b82bd197b2bc3a39c433e1f9d
+size 196653

models/openai_vs.pkl ADDED Viewed

Binary file (277 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+langchain
+openai
+faiss-cpu==1.7.3
+unstructured==0.5.8
+ffmpeg-python
+transformers
+gtts
+torch
+tiktoken
+huggingface-hub
+google-generativeai
+gradio
+jq

schema.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from pydantic import BaseModel
+from typing import List, Optional
+class ForumUser(BaseModel):
+    id: int
+    anonymous_id: str
+    display_name: str
+    avatar_image_url: str
+    html_url: str
+    pronouns: Optional[str]
+class ForumPost(BaseModel):
+    id: int
+    user_id: int
+    parent_id: Optional[int]
+    created_at: str
+    updated_at: str
+    rating_count: Optional[int]
+    rating_sum: Optional[int]
+    user_name: str
+    message: str
+    user: ForumUser
+    read_state: str
+    forced_read_state: bool
+def get_data_from_json(file_path):
+    with open(file_path, "r") as f:
+        json_data = json.load(f)
+        data = [ForumPost(**item) for item in json_data]
+        return data

utils.py ADDED Viewed

	@@ -0,0 +1,515 @@

+import os
+import pickle
+import langchain
+import faiss
+from langchain import HuggingFaceHub, PromptTemplate
+from langchain.chains import ConversationalRetrievalChain, LLMChain
+from langchain.chat_models import ChatOpenAI
+from langchain.llms import OpenAI
+from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredHTMLLoader
+from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+    StringPromptTemplate
+)
+from langchain.output_parsers import PydanticOutputParser
+from langchain.tools.json.tool import JsonSpec
+from typing import List, Union, Callable
+from langchain.schema import AgentAction, AgentFinish
+import re
+from langchain.text_splitter import CharacterTextSplitter
+from custom_faiss import MyFAISS
+from langchain.cache import InMemoryCache
+from langchain.chat_models import ChatGooglePalm
+from langchain.document_loaders import JSONLoader
+from langchain.agents import initialize_agent, Tool, AgentType
+from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser, BaseMultiActionAgent
+from langchain.tools import StructuredTool
+from langchain.chains import create_tagging_chain
+from typing import List, Tuple, Any, Union
+from langchain.schema import AgentAction, AgentFinish
+from pydantic import BaseModel, Field
+from typing import Optional
+class ToolArgsSchema(BaseModel):
+    student_name: Optional[str] = Field(description="The name of the student")
+    question: str = Field(description="The question being asked")
+    question_type: str = Field(description="The type of question being asked")
+    interest: Optional[str] = Field(description="The interest of the student")
+    class Config:
+        schema_extra = {
+            "required": ["question", "question_type"]
+        }
+langchain.llm_cache = InMemoryCache()
+model_name = "GPT-4"
+pickle_file = "_vs.pkl"
+index_file = "_vs.index"
+models_folder = "models/"
+os.environ["LANGCHAIN_TRACING"] = "true"
+discussions_file_path = "discussion_entries.json"
+llm = OpenAI(model_name="gpt-3.5-turbo-16k", temperature=0, verbose=True)
+embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
+chat_history = []
+memory = ConversationBufferWindowMemory(memory_key="chat_history", k=10)
+vectorstore_index = None
+agent_prompt = """
+I am the LLM AI canvas discussion grading assistant.
+I can answer two types of questions: grade-based questions and interest-based questions.
+Grade-based questions are about the grades of a certain student or a group of students based on the rubric below for the canvas discussion on the topic 8 nouns.
+Interest-based questions are about the interests or skills of a certain student or a group of students based on their discussion posts.
+You have access to the following tools:
+{tools}
+Use the following format:
+Question: the input question you must answer
+Thought: you should always think about type of question it is
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can repeat N times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+Begin!
+Question: {input}
+{agent_scratchpad}
+"""
+# Set up a prompt template
+class CustomPromptTemplate(StringPromptTemplate):
+    # The template to use
+    template: str
+    ############## NEW ######################
+    # The list of tools available
+    tools_getter: Callable
+    def format(self, **kwargs) -> str:
+        # Get the intermediate steps (AgentAction, Observation tuples)
+        # Format them in a particular way
+        intermediate_steps = kwargs.pop("intermediate_steps")
+        thoughts = ""
+        for action, observation in intermediate_steps:
+            thoughts += action.log
+            thoughts += f"\nObservation: {observation}\nThought: "
+        # Set the agent_scratchpad variable to that value
+        kwargs["agent_scratchpad"] = thoughts
+        ############## NEW ######################
+        tools = self.tools_getter(kwargs["input"])
+        # Create a tools variable from the list of tools provided
+        kwargs["tools"] = "\n".join(
+            [f"{tool.name}: {tool.description}" for tool in tools]
+        )
+        # Create a list of tool names for the tools provided
+        kwargs["tool_names"] = ", ".join([tool.name for tool in tools])
+        return self.template.format(**kwargs)
+class CustomOutputParser(AgentOutputParser):
+    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
+        print("llm_output")
+        print(llm_output)
+        # Check if agent should finish
+        if "Final Answer:" in llm_output:
+            return AgentFinish(
+                # Return values is generally always a dictionary with a single `output` key
+                # It is not recommended to try anything else at the moment :)
+                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
+                log=llm_output,
+            )
+        # Parse out the action and action input
+        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
+        match = re.search(regex, llm_output, re.DOTALL)
+        if not match:
+            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
+        action = match.group(1).strip()
+        action_input = match.group(2)
+        # Return the action and action input
+        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
+system_template = """
+I am the LLM AI canvas discussion grading assistant.
+I can answer two types of questions: grade-based questions and interest-based questions.
+Grade-based questions are about the grades of a certain student or a group of students based on the rubric below for the canvas discussion on the topic 8 nouns.
+Interest-based questions are about the interests or skills of a certain student or a group of students based on their discussion posts.
+To grade student discussions, I will follow the rubric below.
+Student Post
+3 points: Post includes 8 nouns and text describing how these nouns relate to the student.
+2 points: Student's post includes 8 nouns but does not offer how those nouns relate to the student.
+1 point: Student's post has significant missing details.
+0 points: The student does not provide an initial post, or otherwise does not follow assignment instructions.
+Response to Others
+3 points: Student responds to at least 3 other student discussion threads AND responds to questions asked of them. Student posts insightful comments that prompt on target discussion. These posts also avoid throw away comments such as I agree, Me too, Good idea.
+2 points: Student was notably lacking in one criterion.
+1 point: Student was notably lacking in two criteria.
+0 points: The student does not interact in the threads of other students.
+I will be able to identify each student by name, and I will be able to share their likings, interests, and other characteristics. I will also be able to filter out students based on their interests.
+I will not deviate from the grading scheme. I will grade each discussion entry and reply carefully, and I will share the grades of all individuals by name on the basis of the rubric with final score.
+The discussions and their replies are in following format:
+Student Post: Student Name
+Reply to: Another Student Discussion ID
+Following are the relevant discussions to grade or answer the interest based questions
+----------------
+Discussions:
+{context}"""
+messages = [
+    SystemMessagePromptTemplate.from_template(system_template),
+    HumanMessagePromptTemplate.from_template("{question}"),
+]
+CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
+def set_model_and_embeddings():
+    global chat_history
+    # set_model(model)
+    # set_embeddings(model)
+    chat_history = []
+def set_embeddings(model):
+    global embeddings
+    if model == "GPT-3.5" or model == "GPT-4":
+        print("Loading OpenAI embeddings")
+        embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
+    elif model == "Flan UL2" or model == "Flan T5":
+        print("Loading Hugging Face embeddings")
+        embeddings = HuggingFaceHubEmbeddings(repo_id="sentence-transformers/all-MiniLM-L6-v2")
+def get_search_index():
+    global vectorstore_index, model_name
+    if os.path.isfile(get_file_path(model_name, pickle_file)) and os.path.isfile(
+            get_file_path(model_name, index_file)) and os.path.getsize(get_file_path(model_name, pickle_file)) > 0:
+        # Load index from pickle file
+        with open(get_file_path(model_name, pickle_file), "rb") as f:
+            # search_index = Chroma(persist_directory=models_folder, embedding_function=embeddings)
+            search_index = pickle.load(f)
+            print("Loaded index")
+    else:
+        search_index = create_index(model_name)
+        print("Created index")
+    vectorstore_index = search_index
+    return search_index
+def create_index(model):
+    source_chunks = create_chunk_documents()
+    search_index = search_index_from_docs(source_chunks)
+    # search_index.persist()
+    faiss.write_index(search_index.index, get_file_path(model, index_file))
+    # Save index to pickle file
+    with open(get_file_path(model, pickle_file), "wb") as f:
+        pickle.dump(search_index, f)
+    return search_index
+def get_file_path(model, file):
+    # If model is GPT3.5 or GPT4 return models_folder + openai + file else return models_folder + hf + file
+    if model == "GPT-3.5" or model == "GPT-4":
+        return models_folder + "openai" + file
+    else:
+        return models_folder + "hf" + file
+def search_index_from_docs(source_chunks):
+    # print("source chunks: " + str(len(source_chunks)))
+    # print("embeddings: " + str(embeddings))
+    search_index = MyFAISS.from_documents(source_chunks, embeddings)
+    return search_index
+def get_html_files():
+    loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
+    document_list = loader.load()
+    for document in document_list:
+        document.metadata["name"] = document.metadata["source"].split("/")[-1].split(".")[0]
+    return document_list
+def metadata_func(record: dict, metadata: dict) -> dict:
+    metadata["name"] = record.get("name")
+    return metadata
+def get_json_file():
+    global discussions_file_path
+    loader = JSONLoader(
+        file_path=discussions_file_path,
+        jq_schema='.[]', metadata_func=metadata_func, content_key="message")
+    return loader.load()
+def fetch_data_for_embeddings():
+    # document_list = get_text_files()
+    document_list = get_html_files()
+    # document_list = get_json_file()
+    print("document list: " + str(len(document_list)))
+    return document_list
+def get_text_files():
+    loader = DirectoryLoader('docs', glob="**/*.txt", loader_cls=TextLoader, recursive=True)
+    document_list = loader.load()
+    return document_list
+def create_chunk_documents():
+    sources = fetch_data_for_embeddings()
+    splitter = CharacterTextSplitter(separator=" ", chunk_size=800, chunk_overlap=0)
+    source_chunks = splitter.split_documents(sources)
+    print("chunks: " + str(len(source_chunks)))
+    return sources
+def get_qa_chain(vectorstore_index, question, metadata):
+    global llm, model_name
+    print(llm)
+    filter_dict = {"name": metadata.student_name}
+    # embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
+    # compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=gpt_3_5_index.as_retriever())
+    retriever = get_retriever(filter_dict, vectorstore_index, metadata)
+    print(retriever.get_relevant_documents(question))
+    chain = ConversationalRetrievalChain.from_llm(llm, retriever, return_source_documents=True,
+                                                  verbose=True, get_chat_history=get_chat_history,
+                                                  combine_docs_chain_kwargs={"prompt": CHAT_PROMPT})
+    return chain
+def get_retriever(filter_dict, vectorstore_index, metadata):
+    if metadata.question_type == "grade-based":
+        retriever = vectorstore_index.as_retriever(search_type='mmr',
+                                                   search_kwargs={'lambda_mult': 1, 'fetch_k': 20, 'k': 10,
+                                                                  'filter': filter_dict})
+    else:
+        retriever = vectorstore_index.as_retriever(search_type='mmr',
+                                                   search_kwargs={'lambda_mult': 1, 'fetch_k': 20, 'k': 10})
+    return retriever
+def get_chat_history(inputs) -> str:
+    res = []
+    for human, ai in inputs:
+        res.append(f"Human:{human}\nAI:{ai}")
+    return "\n".join(res)
+def generate_answer(question, metadata:  ToolArgsSchema) -> str:
+    # print("filter: " + filter)
+    global chat_history, vectorstore_index
+    chain = get_qa_chain(vectorstore_index, question, metadata)
+    result = chain(
+        {"question": question, "chat_history": chat_history})
+    chat_history.extend([(question, result["answer"])])
+    sources = []
+    print(result)
+    for document in result['source_documents']:
+        source = document.metadata['source']
+        sources.append(source.split('/')[-1].split('.')[0])
+        print(sources)
+    source = ',\n'.join(set(sources))
+    # return result['answer'] + '\nSOURCES: ' + source
+    return result['answer']
+def get_question_type(question):
+    parser = PydanticOutputParser(pydantic_object=ToolArgsSchema)
+    prompt_template = """I can answer two types of questions: grade-based questions and interest-based questions.
+Grade-based questions are about the grades of a certain student or a group of students based on the rubric below for the canvas discussion on the topic 8 nouns.
+Interest-based questions are about the interests or skills of a certain student or a group of students based on their discussion posts.
+Question: {question}
+Find following information about the question asked. Return Optional empty if the information is not available.:
+Format instructions: {format_instructions}"""
+    llm = OpenAI(temperature=0)
+    prompt = PromptTemplate(template=prompt_template, input_variables=["question"], output_parser=parser, partial_variables={"format_instructions": parser.get_format_instructions()})
+    llm_chain = LLMChain(
+        llm=llm,
+        prompt=prompt,
+    )
+    output = llm_chain.run(question)
+    output = parser.parse(output)
+    output = generate_answer(question, output)
+    return output
+# class FakeAgent(BaseMultiActionAgent):
+#     """Fake Custom Agent."""
+#
+#     @property
+#     def input_keys(self):
+#         return ["input"]
+#
+#     def plan(
+#             self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
+#     ) -> Union[List[AgentAction], AgentFinish]:
+#         print("input keys")
+#         print(self.input_keys)
+#         print("intermediate steps")
+#         print(intermediate_steps)
+#         print("kwargs")
+#         print(kwargs)
+#
+#         """Given input, decided what to do.
+#
+#         Args:
+#             intermediate_steps: Steps the LLM has taken to date,
+#                 along with observations
+#             **kwargs: User inputs.
+#
+#         Returns:
+#             Action specifying what tool to use.
+#         """
+#         if len(intermediate_steps) == 0:
+#             first_action = AgentAction(tool="question type", tool_input=kwargs["input"], log="")
+#             print("first action")
+#             print(first_action)
+#             second_action = AgentAction(tool="Grade",tool_input=kwargs["input"], log="")
+#             print("second action")
+#             print(second_action)
+#             return [
+#                 first_action,
+#                 second_action,
+#             ]
+#         else:
+#             return AgentFinish(return_values={"output": "bar"}, log="")
+#
+#     async def aplan(
+#             self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
+#     ) -> Union[List[AgentAction], AgentFinish]:
+#         """Given input, decided what to do.
+#
+#         Args:
+#             intermediate_steps: Steps the LLM has taken to date,
+#                 along with observations
+#             **kwargs: User inputs.
+#
+#         Returns:
+#             Action specifying what tool to use.
+#         """
+#         if len(intermediate_steps) == 0:
+#             return [
+#                 AgentAction(tool="question type", tool_input=kwargs["input"], log=""),
+#                 AgentAction(tool="Grade",
+#                             tool_input={
+#                                 "student_name": kwargs["student_name"],
+#                                 "question": kwargs["question"],
+#                                 "question_type": kwargs["question_type"],
+#                                 "interest": kwargs["interest"]
+#                             }, log=""),
+#             ]
+#         else:
+#             return AgentFinish(return_values={"output": "bar"}, log="")
+#
+#
+# schema = {
+#     "properties": {
+#         "student_name" : {"type": "string", "description": "The name of the student"},
+#         "question": {"type": "string", "description": "The question being asked"},
+#         "question type" : {"type": "string",
+#                            "enum": ["student grades", "student specific", "interest specific"],
+#                            "description": "The type of question being asked"},
+#         "interest" : {"type": "string", "description": "The interest of the student"},
+#     },
+#     "required": ["question", "question type"]
+# }
+# def get_tagging_chain(question)-> str:
+#     global schema
+#     chain = create_tagging_chain(schema, llm)
+#     first_answer = chain.run(question)
+#     print("first answer:")
+#     print(first_answer)
+#     return first_answer
+#
+#
+# def get_grading_agent():
+#
+#     tools = [
+#         Tool(
+#             name="question type",
+#             func=get_tagging_chain,
+#             description="Useful when you need to understand the type of the input."
+#         ),
+#         StructuredTool(
+#             name="Grade",
+#             func=generate_answer,
+#             description="Useful when you need to answer questions about students, grades, interests, etc from the context of canvas discussion posts. If the question is student specific, student name is required.",
+#             args_schema=ToolArgsSchema
+#         )
+#     ]
+#     # agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
+#
+#     agent = FakeAgent(output_parser=CustomOutputParser())
+#     # prompt = CustomPromptTemplate(template=agent_prompt, tools=tools, input_variables=["input", "intermediate_steps"])
+#     # output_parser = CustomOutputParser()
+#     # tool_names = [tool.name for tool in tools]
+#     # llm_chain = LLMChain(llm=llm, prompt=prompt)
+#     # agent = LLMSingleActionAgent(
+#     #     llm_chain=llm_chain,
+#     #     output_parser=output_parser,
+#     #     stop=["\nObservation:"],
+#     #     allowed_tools=tool_names,
+#     # )
+#     agent_executor = AgentExecutor.from_agent_and_tools(
+#         agent=agent, tools=tools, verbose=True
+#     )
+#
+#     # return initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)
+#     return agent_executor
+#
+#
+#
+# def grade_answer(question) -> str:
+#     global chat_history, vectorstore_index
+#     agent = get_grading_agent()
+#     return agent.run(question)