Spaces:
Sleeping
Sleeping
File size: 3,621 Bytes
0eb6419 564ef30 0eb6419 564ef30 0eb6419 78959e0 0eb6419 78959e0 315655d 0eb6419 315655d e0db20e 6427929 78959e0 0eb6419 315655d 0eb6419 564ef30 0eb6419 908fcb9 0eb6419 315655d e0db20e 315655d 0eb6419 315655d 0eb6419 315655d 564ef30 0eb6419 78959e0 564ef30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import os
from glob import glob
import openai
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = api_key
# Helper function to validate response completeness
def is_response_complete(response: str) -> bool:
return response.strip()[-1] in ".!?"
# Retry mechanism for incomplete responses
def retry_response(messages):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
).choices[0].message['content']
if not is_response_complete(response):
response += " This is the end of the response. Please let me know if you need further clarification."
return response
def base_model_chatbot(messages):
system_message = [
{"role": "system", "content": "You are a helpful AI chatbot that provides clear, complete, and coherent responses to User's questions. Ensure your answers are in full sentences and complete the thought or idea."}
]
messages = system_message + messages
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
).choices[0].message['content']
# Validate response completeness
if not is_response_complete(response):
response = retry_response(messages)
return response
class VectorDB:
"""Class to manage document loading and vector database creation."""
def __init__(self, docs_directory: str):
self.docs_directory = docs_directory
def create_vector_db(self):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
files = glob(os.path.join(self.docs_directory, "*.pdf"))
loadPDFs = [PyPDFLoader(pdf_file) for pdf_file in files]
pdf_docs = list()
for loader in loadPDFs:
pdf_docs.extend(loader.load())
chunks = text_splitter.split_documents(pdf_docs)
return Chroma.from_documents(chunks, OpenAIEmbeddings())
class ConversationalRetrievalChain:
"""Class to manage the QA chain setup."""
def __init__(self, model_name="gpt-3.5-turbo", temperature=0):
self.model_name = model_name
self.temperature = temperature
def create_chain(self):
model = ChatOpenAI(
model_name=self.model_name,
temperature=self.temperature,
system_prompt="You are a knowledgeable AI that answers questions based on provided documents. Always give responses in clear, complete sentences."
)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
vector_db = VectorDB('docs/')
retriever = vector_db.create_vector_db().as_retriever(search_type="similarity", search_kwargs={"k": 2})
return RetrievalQA.from_chain_type(
llm=model,
retriever=retriever,
memory=memory,
)
def with_pdf_chatbot(messages):
query = messages[-1]['content'].strip()
qa_chain = ConversationalRetrievalChain().create_chain()
result = qa_chain({"query": query})
if not is_response_complete(result['result']):
result['result'] += " This is the end of the response. Let me know if you need further clarification."
return result['result'] |