VoiceChat / generate_answer.py
shukdevdatta123's picture
Update generate_answer.py
564ef30 verified
import os
from glob import glob
import openai
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = api_key
# Helper function to validate response completeness
def is_response_complete(response: str) -> bool:
return response.strip()[-1] in ".!?"
# Retry mechanism for incomplete responses
def retry_response(messages):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
).choices[0].message['content']
if not is_response_complete(response):
response += " This is the end of the response. Please let me know if you need further clarification."
return response
def base_model_chatbot(messages):
system_message = [
{"role": "system", "content": "You are a helpful AI chatbot that provides clear, complete, and coherent responses to User's questions. Ensure your answers are in full sentences and complete the thought or idea."}
]
messages = system_message + messages
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
).choices[0].message['content']
# Validate response completeness
if not is_response_complete(response):
response = retry_response(messages)
return response
class VectorDB:
"""Class to manage document loading and vector database creation."""
def __init__(self, docs_directory: str):
self.docs_directory = docs_directory
def create_vector_db(self):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
files = glob(os.path.join(self.docs_directory, "*.pdf"))
loadPDFs = [PyPDFLoader(pdf_file) for pdf_file in files]
pdf_docs = list()
for loader in loadPDFs:
pdf_docs.extend(loader.load())
chunks = text_splitter.split_documents(pdf_docs)
return Chroma.from_documents(chunks, OpenAIEmbeddings())
class ConversationalRetrievalChain:
"""Class to manage the QA chain setup."""
def __init__(self, model_name="gpt-3.5-turbo", temperature=0):
self.model_name = model_name
self.temperature = temperature
def create_chain(self):
model = ChatOpenAI(
model_name=self.model_name,
temperature=self.temperature,
system_prompt="You are a knowledgeable AI that answers questions based on provided documents. Always give responses in clear, complete sentences."
)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
vector_db = VectorDB('docs/')
retriever = vector_db.create_vector_db().as_retriever(search_type="similarity", search_kwargs={"k": 2})
return RetrievalQA.from_chain_type(
llm=model,
retriever=retriever,
memory=memory,
)
def with_pdf_chatbot(messages):
query = messages[-1]['content'].strip()
qa_chain = ConversationalRetrievalChain().create_chain()
result = qa_chain({"query": query})
if not is_response_complete(result['result']):
result['result'] += " This is the end of the response. Let me know if you need further clarification."
return result['result']