Spaces:
Paused
Paused
# -*- coding: utf-8 -*- | |
"""AIE3final.py | |
______ | |
Automated Grading System for AIE3 Final Project | |
______ | |
""" | |
# Import necessary libraries | |
import logging | |
import sys | |
import os | |
import asyncio | |
import shutil | |
from readfile import prepare_files, USER_FILES_DIR | |
from typing import List, Dict, Tuple | |
from dotenv import load_dotenv | |
from langchain_community.document_loaders import PyMuPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.schema import Document | |
from langchain_core.messages import AIMessage | |
from langchain_openai import OpenAIEmbeddings | |
from docx import Document as DocxDocument | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_openai import ChatOpenAI | |
import openai | |
import json | |
import numpy as np | |
from sklearn.metrics.pairwise import cosine_similarity | |
import chainlit as cl | |
import asyncio | |
from readfile import prepare_files | |
from promptsplitembed import create_prompt, split_documents, generate_embeddings, create_qamodel | |
from extractjson import extract_json | |
from calcscore import compute_cosine_similarity, llm_similarity | |
from prompt_templates import ref_prompt, student_prompt, llm_score_prompt_template | |
# Load environment variables | |
load_dotenv() | |
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] | |
openai.api_key = OPENAI_API_KEY | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
qa_chat_model = create_qamodel(model="gpt-4o-mini", temperature=0 | |
) | |
def process_reference(reference_document, ref_prompt): | |
ref_prompt_template = create_prompt(ref_prompt) | |
ref_generation_chain = ref_prompt_template | qa_chat_model | |
result = ref_generation_chain.invoke({"source": reference_document}) | |
ref_gen_tokens = result.usage_metadata["total_tokens"] | |
reference = dict(extract_json(result)[0]) | |
answers = {} | |
for key in reference: | |
if key.startswith('Question'): | |
question_number = key.split('#')[1] | |
answer_key = f'Answer #{question_number}' | |
answers[key] = reference[answer_key] | |
print("Processed reference document") | |
return reference, answers, ref_gen_tokens | |
def process_student(documents, reference, student_prompt): | |
test_doc = documents[0] | |
student_prompt_template = create_prompt(student_prompt) | |
student_response_chain = student_prompt_template | qa_chat_model | |
student_result = student_response_chain.invoke({"source": reference.keys(),"student": test_doc }) | |
student_gen_tokens = student_result.usage_metadata["total_tokens"] | |
student_result = dict(extract_json(student_result)[0]) | |
print("Processed student document") | |
return student_result, student_gen_tokens | |
def compare_docs(answers, student_result): | |
split_reference_docs, ref_tokens = {}, 0 | |
split_student_docs, student_tokens = {}, 0 | |
for key, value in answers.items(): | |
compare_docs, tokens = split_documents([Document(page_content=value)]) | |
split_reference_docs[key] = compare_docs | |
ref_tokens += tokens | |
for key, value in student_result.items(): | |
compare_docs, tokens = split_documents([Document(page_content=value)]) | |
split_student_docs[key] = compare_docs | |
student_tokens += tokens | |
reference_embeddings = {key: generate_embeddings(value)[0] for key, value in split_reference_docs.items()} | |
student_embeddings = {key: generate_embeddings(value)[0] for key, value in split_student_docs.items()} | |
print("Completed comparing student ans solution answers.") | |
return reference_embeddings, student_embeddings, ref_tokens, student_tokens | |
def process_data(zip_file_name: str, prompt_template) -> Tuple[float, float, int, int, int]: | |
documents, reference_document = prepare_files(zip_file_name) | |
reference, answers, ref_gen_tokens = process_reference(reference_document, ref_prompt) | |
student_result, student_gen_tokens = process_student(documents, reference, student_prompt) | |
reference_embeddings, student_embeddings, ref_tokens, student_tokens = compare_docs(answers, student_result) | |
student_total_tokens = student_gen_tokens + student_tokens | |
ref_total_tokens = ref_gen_tokens + ref_tokens | |
average_similarity = compute_cosine_similarity(reference_embeddings, student_embeddings) | |
average_score, llm_score_tokens = llm_similarity(answers, student_result, llm_score_prompt_template) | |
llm_total_tokens = ref_gen_tokens + student_gen_tokens + llm_score_tokens | |
return average_similarity, average_score, ref_total_tokens, student_total_tokens, llm_total_tokens | |
user_wants_to_continue = False | |
uploaded_file_name = None | |
async def start(): | |
global uploaded_file_name | |
files = None | |
# Wait for the user to upload a file | |
while files is None: | |
files = await cl.AskFileMessage( | |
content="Please upload a zip file to begin!", accept={"application/zip": [".zip"]} | |
).send() | |
zip_file = files[0] # Assuming only one file is uploaded | |
file_path = os.path.join(USER_FILES_DIR, zip_file.name) | |
uploaded_file_name = zip_file.name | |
# Move the uploaded file to the user files directory | |
shutil.move(zip_file.path, file_path) | |
# Let the user know that the system is ready | |
await cl.Message(content=f"`{zip_file.name}` uploaded successfully!").send() | |
# Ask if the user wants to proceed with grading | |
await cl.Message(content="Do you want to proceed with the grading? (yes/no)").send() | |
async def process_grading(): | |
global uploaded_file_name | |
if uploaded_file_name: | |
try: | |
# Process the uploaded ZIP file | |
average_similarity, average_score, ref_total_tokens, student_total_tokens, llm_total_tokens = process_data(uploaded_file_name, llm_score_prompt_template) | |
# Send results | |
await cl.Message(content=f"Processing complete. Results:\n" | |
f"Average Similarity: {average_similarity:.2f}\n" | |
f"Average Score: {average_score:.2f}\n" | |
f"Reference Total Tokens: {ref_total_tokens}\n" | |
f"Student Total Tokens: {student_total_tokens}\n" | |
f"LLM Total Tokens: {llm_total_tokens}").send() | |
except Exception as e: | |
await cl.Message(content=f"An error occurred while processing the zip file: {str(e)}").send() | |
else: | |
await cl.Message(content="No file has been uploaded yet. Please upload a ZIP file first.").send() | |
async def on_message(message: cl.Message): | |
global user_wants_to_continue, uploaded_file_name | |
if message.content.lower() == 'yes' and not user_wants_to_continue: | |
if uploaded_file_name: | |
# Start processing | |
processing_message = cl.Message(content="Processing files...") | |
await processing_message.send() | |
await asyncio.sleep(0.5) | |
await process_grading() | |
# Ask user if they want to continue after processing is done | |
user_wants_to_continue = True | |
await cl.Message(content="Do you want to continue? (yes/no)").send() | |
# ... rest of the function ... | |
if __name__ == "__main__": | |
# Ensure the user files directory exists | |
os.makedirs(USER_FILES_DIR, exist_ok=True) | |
# Your Chainlit app setup and run code here | |
cl.run() |