Spaces:
Sleeping
Sleeping
File size: 5,388 Bytes
160fbe1 5102822 49bb688 160fbe1 5102822 ecf2148 e0d1f66 5102822 e0d1f66 160fbe1 5102822 e0d1f66 160fbe1 2139002 160fbe1 5102822 e0d1f66 5102822 e0d1f66 5102822 3455401 5102822 160fbe1 5102822 e0d1f66 5102822 160fbe1 3455401 5102822 e0d1f66 5102822 160fbe1 3455401 160fbe1 5102822 e0d1f66 5102822 0d30433 5102822 e0d1f66 5102822 3455401 e0d1f66 51fcb96 14e71a0 5991ba4 5102822 e0d1f66 5991ba4 e0d1f66 3455401 e0d1f66 51fcb96 14e71a0 5102822 e0d1f66 0d30433 5991ba4 3455401 5991ba4 0d30433 e0d1f66 0d30433 e0d1f66 0401909 e0d1f66 3455401 5991ba4 3455401 0d30433 5102822 0d30433 160fbe1 5102822 e0d1f66 3455401 5102822 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import openai
import os
import streamlit as st
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv
# Set Streamlit page configuration
st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide")
# Load environment variables from .env file
load_dotenv()
# Retrieve OpenAI API key from environment
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
raise ValueError("OpenAI API key not found. Set it in the .env file or environment variables.")
openai.api_key = OPENAI_API_KEY
# Function to generate response from OpenAI API
def generate_openai_response(instruction, context=None):
try:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": instruction},
]
if context:
messages.append({"role": "user", "content": f"Context: {context}"})
response = openai.ChatCompletion.create(
model="gpt-4", # Updated to use GPT-4
messages=messages,
max_tokens=1200,
temperature=0.7
)
return response["choices"][0]["message"]["content"]
except Exception as e:
return f"Error: {str(e)}"
# Extract text from .txt files in a folder
def get_text_files_content(folder):
text = ""
for filename in os.listdir(folder):
if filename.endswith('.txt'):
with open(os.path.join(folder, filename), 'r', encoding='utf-8') as file:
text += file.read() + "\n"
return text
# Convert raw text into manageable chunks
def get_chunks(raw_text):
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000, # Reduced chunk size for faster processing
chunk_overlap=200, # Smaller overlap for efficiency
length_function=len
)
return text_splitter.split_text(raw_text)
# Create a FAISS vectorstore using OpenAI embeddings
def get_vectorstore(chunks):
embeddings = OpenAIEmbeddings() # Uses OpenAI Embeddings
vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings)
return vectorstore
# Handle user queries by fetching relevant context and generating responses
def handle_question(question, vectorstore=None):
if vectorstore:
# Retrieve relevant chunks using similarity search
documents = vectorstore.similarity_search(question, k=2)
context = "\n".join([doc.page_content for doc in documents])
context = context[:1000] # Limit context size for faster processing
return generate_openai_response(question, context)
else:
# Fallback to instruction-only prompt if no context is found
return generate_openai_response(question)
# Main function for the Streamlit app
def main():
st.title("Chat with Notes :books:")
# Initialize session state for vectorstore
if "vectorstore" not in st.session_state:
st.session_state.vectorstore = None
# Define folders for Current Affairs and Essays
data_folder = "data" # Folder for Current Affairs notes
essay_folder = "essays" # Folder for Essays
# Content type selection
content_type = st.sidebar.radio("Select Content Type:", ["Current Affairs", "Essays"])
# Populate subject list based on selected content type
if content_type == "Current Affairs":
subjects = [f for f in os.listdir(data_folder) if os.path.isdir(os.path.join(data_folder, f))] if os.path.exists(data_folder) else []
elif content_type == "Essays":
subjects = [f.replace(".txt", "") for f in os.listdir(essay_folder) if f.endswith('.txt')] if os.path.exists(essay_folder) else []
# Subject selection
selected_subject = st.sidebar.selectbox("Select a Subject:", subjects)
# Load and process the selected subject
raw_text = ""
if content_type == "Current Affairs" and selected_subject:
subject_folder = os.path.join(data_folder, selected_subject)
raw_text = get_text_files_content(subject_folder)
elif content_type == "Essays" and selected_subject:
subject_file = os.path.join(essay_folder, selected_subject + ".txt")
if os.path.exists(subject_file):
with open(subject_file, "r", encoding="utf-8") as file:
raw_text = file.read()
# Display notes preview
if raw_text:
st.subheader("Preview of Notes")
st.text_area("Preview Content:", value=raw_text[:2000], height=300, disabled=True)
# Generate vectorstore for the selected notes
text_chunks = get_chunks(raw_text)
vectorstore = get_vectorstore(text_chunks)
st.session_state.vectorstore = vectorstore
else:
st.warning("No content available for the selected subject.")
# Chat interface
st.subheader("Ask Your Question")
question = st.text_input("Ask a question about your selected subject:")
if question:
if st.session_state.vectorstore:
response = handle_question(question, st.session_state.vectorstore)
st.subheader("Answer:")
st.write(response)
else:
st.warning("Please load the content for the selected subject before asking a question.")
# Run the app
if __name__ == '__main__':
main()
|