Spaces:

shukdevdatta123
/

Educational-Assistant

Sleeping

File size: 8,962 Bytes

import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
import openai
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.chat_models import ChatOpenAI
from fpdf import FPDF
import os
import spacy
from collections import Counter

# Load SpaCy model for NLP tasks (install 'spacy' and 'en_core_web_sm' first)
nlp = spacy.load("en_core_web_sm")

# Set up Streamlit UI
st.title('Educational Assistant')
st.header('Summary, Quiz Generator, Q&A, Study Plan, Vocabulary Builder & Topic Extraction')
st.sidebar.title('Drop your PDF here')

# Input OpenAI API key from keyboard
openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password")

user_file_upload = st.sidebar.file_uploader(label='', type='pdf')

# Sidebar option selection for Summary, Quiz, Q&A, Study Plan, Vocabulary Builder, or Topic Extraction
option = st.sidebar.radio("Choose an option", ('Generate Summary', 'Generate Quiz', 'Ask a Question', 'Generate Study Plan', 'Vocabulary Builder', 'Topic Extraction'))

# Input for asking questions (only visible when "Ask a Question" is selected)
question_input = None
if option == 'Ask a Question':
    question_input = st.text_input("Enter your question about the document:")

# Function to generate a PDF and allow download
def generate_pdf(response, filename="response.pdf"):
    pdf = FPDF()
    pdf.add_page()
    
    # Adding a Unicode-compatible font (like Arial Unicode MS or other compatible font)
    pdf.add_font('ArialUnicode', '', 'arialuni.ttf', uni=True)  # Path to font, make sure this is correct for your system
    pdf.set_font('ArialUnicode', '', 12)
    
    # Add the response text
    pdf.multi_cell(0, 10, response)
    
    # Save to a temporary file
    pdf.output(filename)
    
    # Return the file path
    return filename

def extract_key_terms(text):
    """
    Extract key terms (nouns) from the text using spaCy.
    """
    doc = nlp(text)
    key_terms = [token.text for token in doc if token.pos_ == "NOUN" and not token.is_stop]
    
    # Count frequency of terms (for demonstration purposes, we show the top 10 most frequent terms)
    term_frequency = Counter(key_terms)
    most_common_terms = term_frequency.most_common(10)
    
    return most_common_terms

def extract_topics(text):
    """
    Extract main topics (entities) using spaCy's Named Entity Recognition (NER).
    """
    doc = nlp(text)
    topics = [ent.text for ent in doc.ents]  # Extract named entities (topics)
    
    return set(topics)  # Return unique entities as topics

if openai_api_key:
    # Set OpenAI API key
    openai.api_key = openai_api_key

    if user_file_upload:
        # Read the uploaded file
        pdf_data = user_file_upload.read()

        # Save the uploaded file to a temporary location
        with open("temp_pdf_file.pdf", "wb") as f:
            f.write(pdf_data)

        # Load the temporary PDF file
        loader = PyPDFLoader("temp_pdf_file.pdf")
        data = loader.load_and_split()

        ## Prompt Template for Summary
        prompt_1 = ChatPromptTemplate.from_messages(
            [
                ("system", "You are a smart assistant. Give a summary of the user's PDF. Be polite."),
                ("user", "{data}")
            ]
        )

        # Pass the OpenAI API key explicitly to the ChatOpenAI instance
        llm_summary = ChatOpenAI(model="gpt-4o-mini", openai_api_key=openai_api_key)  # Pass the key here
        output_parser = StrOutputParser()
        chain_1 = prompt_1 | llm_summary | output_parser

        ## Prompt Template for Quiz
        prompt_2 = ChatPromptTemplate.from_messages(
            [
                ("system", "You are a smart assistant. Generate 10 multiple-choice quiz questions with 4 options each (including correct and incorrect options) from the user's PDF. Please also include the correct answer in your response. Be polite."),
                ("user", "{data}")
            ]
        )

        # Pass the OpenAI API key explicitly to the ChatOpenAI instance
        llm_quiz = ChatOpenAI(model="gpt-4o-mini", openai_api_key=openai_api_key)  # Pass the key here
        output_parser = StrOutputParser()
        chain_2 = prompt_2 | llm_quiz | output_parser

        ## Prompt Template for Question-Answering
        prompt_3 = ChatPromptTemplate.from_messages(
            [
                ("system", "You are a smart assistant. Answer the user's question based on the content of the PDF. Be polite."),
                ("user", "{data}\n\nUser's question: {question}")
            ]
        )

        # Pass the OpenAI API key explicitly to the ChatOpenAI instance
        llm_qa = ChatOpenAI(model="gpt-4o-mini", openai_api_key=openai_api_key)  # Pass the key here
        output_parser = StrOutputParser()
        chain_3 = prompt_3 | llm_qa | output_parser

        ## Prompt Template for Study Plan
        prompt_4 = ChatPromptTemplate.from_messages(
            [
                ("system", "You are a smart assistant. Based on the content of the user's PDF, generate a 7-day study plan. Divide the content into 7 topics and assign each topic to a day. Please make it logical and balanced."),
                ("user", "{data}")
            ]
        )

        # Pass the OpenAI API key explicitly to the ChatOpenAI instance
        llm_study_plan = ChatOpenAI(model="gpt-4o-mini", openai_api_key=openai_api_key)  # Pass the key here
        output_parser = StrOutputParser()
        chain_4 = prompt_4 | llm_study_plan | output_parser

    if option == 'Generate Summary':
        # Generate summary
        summary_response = chain_1.invoke({'data': data})
        st.write(summary_response)
        
        # Generate PDF for the summary and offer it as a download
        pdf_filename = generate_pdf(summary_response, filename="summary_response.pdf")
        st.download_button("Download Summary as PDF", data=open(pdf_filename, "rb").read(), file_name=pdf_filename, mime="application/pdf")
        
    elif option == 'Generate Quiz':
        # Generate quiz
        quiz_response = chain_2.invoke({'data': data})
        st.write(quiz_response)
        
        # Generate PDF for the quiz and offer it as a download
        pdf_filename = generate_pdf(quiz_response, filename="quiz_response.pdf")
        st.download_button("Download Quiz as PDF", data=open(pdf_filename, "rb").read(), file_name=pdf_filename, mime="application/pdf")
        
    elif option == 'Ask a Question' and question_input:
        # Add a "Generate Answer" button
        generate_answer = st.button("Generate Answer")
        
        if generate_answer:
            # Generate answer for the user's question
            question_answer_response = chain_3.invoke({'data': data, 'question': question_input})
            st.write(question_answer_response)
            
            # Generate PDF for the question answer and offer it as a download
            pdf_filename = generate_pdf(question_answer_response, filename="question_answer_response.pdf")
            st.download_button("Download Answer as PDF", data=open(pdf_filename, "rb").read(), file_name=pdf_filename, mime="application/pdf")

    elif option == 'Generate Study Plan':
        # Generate study plan
        study_plan_response = chain_4.invoke({'data': data})
        st.write(study_plan_response)
        
        # Generate PDF for the study plan and offer it as a download
        pdf_filename = generate_pdf(study_plan_response, filename="study_plan_response.pdf")
        st.download_button("Download Study Plan as PDF", data=open(pdf_filename, "rb").read(), file_name=pdf_filename, mime="application/pdf")

    elif option == 'Vocabulary Builder':
        # Extract key terms from the document
        key_terms = extract_key_terms(' '.join([page['text'] for page in data]))
        st.write("Key Terms and Definitions:")
        for term, frequency in key_terms:
            st.write(f"{term} - Frequency: {frequency}")
        
        # Generate PDF for the key terms and offer it as a download
        pdf_filename = generate_pdf(str(key_terms), filename="key_terms.pdf")
        st.download_button("Download Vocabulary Builder PDF", data=open(pdf_filename, "rb").read(), file_name=pdf_filename, mime="application/pdf")

    elif option == 'Topic Extraction':
        # Extract topics (named entities) from the document
        topics = extract_topics(' '.join([page['text'] for page in data]))
        st.write("Extracted Topics:")
        for topic in topics:
            st.write(topic)
        
        # Generate PDF for the topics and offer it as a download
        pdf_filename = generate_pdf(str(topics), filename="topics.pdf")
        st.download_button("Download Topics PDF", data=open(pdf_filename, "rb").read(), file_name=pdf_filename, mime="application/pdf")

else:
    st.sidebar.warning("Please enter your OpenAI API Key to proceed.")