Spaces:

danishjameel003
/

CSSChatbot

Sleeping

File size: 5,388 Bytes

import openai
import os
import streamlit as st
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv

# Set Streamlit page configuration
st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide")

# Load environment variables from .env file
load_dotenv()

# Retrieve OpenAI API key from environment
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise ValueError("OpenAI API key not found. Set it in the .env file or environment variables.")
openai.api_key = OPENAI_API_KEY

# Function to generate response from OpenAI API
def generate_openai_response(instruction, context=None):
    try:
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": instruction},
        ]
        if context:
            messages.append({"role": "user", "content": f"Context: {context}"})
        response = openai.ChatCompletion.create(
            model="gpt-4",  # Updated to use GPT-4
            messages=messages,
            max_tokens=1200,
            temperature=0.7
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error: {str(e)}"

# Extract text from .txt files in a folder
def get_text_files_content(folder):
    text = ""
    for filename in os.listdir(folder):
        if filename.endswith('.txt'):
            with open(os.path.join(folder, filename), 'r', encoding='utf-8') as file:
                text += file.read() + "\n"
    return text

# Convert raw text into manageable chunks
def get_chunks(raw_text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,  # Reduced chunk size for faster processing
        chunk_overlap=200,  # Smaller overlap for efficiency
        length_function=len
    )
    return text_splitter.split_text(raw_text)

# Create a FAISS vectorstore using OpenAI embeddings
def get_vectorstore(chunks):
    embeddings = OpenAIEmbeddings()  # Uses OpenAI Embeddings
    vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings)
    return vectorstore

# Handle user queries by fetching relevant context and generating responses
def handle_question(question, vectorstore=None):
    if vectorstore:
        # Retrieve relevant chunks using similarity search
        documents = vectorstore.similarity_search(question, k=2)
        context = "\n".join([doc.page_content for doc in documents])
        context = context[:1000]  # Limit context size for faster processing
        return generate_openai_response(question, context)
    else:
        # Fallback to instruction-only prompt if no context is found
        return generate_openai_response(question)

# Main function for the Streamlit app
def main():
    st.title("Chat with Notes :books:")

    # Initialize session state for vectorstore
    if "vectorstore" not in st.session_state:
        st.session_state.vectorstore = None

    # Define folders for Current Affairs and Essays
    data_folder = "data"  # Folder for Current Affairs notes
    essay_folder = "essays"  # Folder for Essays

    # Content type selection
    content_type = st.sidebar.radio("Select Content Type:", ["Current Affairs", "Essays"])

    # Populate subject list based on selected content type
    if content_type == "Current Affairs":
        subjects = [f for f in os.listdir(data_folder) if os.path.isdir(os.path.join(data_folder, f))] if os.path.exists(data_folder) else []
    elif content_type == "Essays":
        subjects = [f.replace(".txt", "") for f in os.listdir(essay_folder) if f.endswith('.txt')] if os.path.exists(essay_folder) else []

    # Subject selection
    selected_subject = st.sidebar.selectbox("Select a Subject:", subjects)

    # Load and process the selected subject
    raw_text = ""
    if content_type == "Current Affairs" and selected_subject:
        subject_folder = os.path.join(data_folder, selected_subject)
        raw_text = get_text_files_content(subject_folder)
    elif content_type == "Essays" and selected_subject:
        subject_file = os.path.join(essay_folder, selected_subject + ".txt")
        if os.path.exists(subject_file):
            with open(subject_file, "r", encoding="utf-8") as file:
                raw_text = file.read()

    # Display notes preview
    if raw_text:
        st.subheader("Preview of Notes")
        st.text_area("Preview Content:", value=raw_text[:2000], height=300, disabled=True)

        # Generate vectorstore for the selected notes
        text_chunks = get_chunks(raw_text)
        vectorstore = get_vectorstore(text_chunks)
        st.session_state.vectorstore = vectorstore
    else:
        st.warning("No content available for the selected subject.")

    # Chat interface
    st.subheader("Ask Your Question")
    question = st.text_input("Ask a question about your selected subject:")
    if question:
        if st.session_state.vectorstore:
            response = handle_question(question, st.session_state.vectorstore)
            st.subheader("Answer:")
            st.write(response)
        else:
            st.warning("Please load the content for the selected subject before asking a question.")

# Run the app
if __name__ == '__main__':
    main()