JohnsonMLEngineer commited on
Commit
479a9a1
·
verified ·
1 Parent(s): abb99ad

Upload 6 files

Browse files
chatbot.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ def chatbot_response(query, rag_chain):
2
+ try:
3
+ return rag_chain.invoke({"input": query})["answer"]
4
+ except Exception as e:
5
+ return f"Error processing query: {str(e)}"
config.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ def setup_environment():
5
+ load_dotenv()
6
+ return os.getenv("GOOGLE_API_KEY")
document_processor.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFDirectoryLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+
4
+ directory = PyPDFDirectoryLoader("documents/")
5
+ def read_documents(directory):
6
+ return PyPDFDirectoryLoader(directory).load()
7
+
8
+ def chunk_data(docs, chunk_size=800, chunk_overlap=40):
9
+ return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap).split_documents(docs)
embeddings_store.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
2
+ from langchain_chroma import Chroma
3
+ from langchain.chains import create_retrieval_chain
4
+ from langchain.chains.combine_documents import create_stuff_documents_chain
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+
7
+ def create_embeddings_and_store(doc_chunks):
8
+ vectorstore = Chroma.from_documents(
9
+ documents=doc_chunks,
10
+ embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001")
11
+ )
12
+
13
+ retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2})
14
+
15
+ system_prompt = """
16
+ You are an AI assistant for question-answering tasks about Englishfirm. Englishfirm is one of the leading PTE coaching academies in Sydney, distinguished for providing 100% one-on-one coaching, a unique offering among the 52 PTE institutes in Sydney.
17
+ Englishfirm operates 7 days a week from two branches: Sydney CBD (Pitt Street) and Parramatta.
18
+ The key team members include Nimisha James (Head Trainer), Avanti (Associate Trainer), Vandana (Trainer), and Kaspin (Student Counsellor for University Admissions).alyze the provided context and answer the user's question concisely. Follow these guidelines:
19
+
20
+ 1. Utilize only the information provided in the context above to formulate your responses.
21
+ 2. If the context doesn't contain sufficient information to answer a question, respond with: "I don't have enough information to answer this question."
22
+ 3. Craft clear, direct answers limited to a maximum of seven sentences.
23
+ 4. Maintain a professional and informative tone in all interactions.
24
+ 5. Highlight Englishfirm's unique features when relevant, such as the exclusive one-on-one coaching and convenient locations.
25
+ Context:
26
+ {context}
27
+ """
28
+
29
+ prompt = ChatPromptTemplate.from_messages([
30
+ ("system", system_prompt),
31
+ ("human", "{input}"),
32
+ ])
33
+
34
+ llm_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.3, max_tokens=250)
35
+
36
+ question_answer_chain = create_stuff_documents_chain(llm_model, prompt)
37
+ return create_retrieval_chain(retriever, question_answer_chain)
rag_initializer.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from document_processor import read_documents, chunk_data
3
+ from embeddings_store import create_embeddings_and_store
4
+
5
+ @st.cache_resource
6
+ def initialize_rag_chain():
7
+ try:
8
+ docs = read_documents("Englishfirm.pdf")
9
+ doc_chunks = chunk_data(docs)
10
+ return create_embeddings_and_store(doc_chunks)
11
+ except Exception as e:
12
+ st.error(f"Error initializing RAG chain: {str(e)}")
13
+ return None
requirements.txt ADDED
File without changes