File size: 3,621 Bytes
0eb6419
 
 
 
564ef30
0eb6419
 
 
 
564ef30
0eb6419
 
 
 
 
 
 
 
78959e0
 
 
 
 
 
 
 
 
 
 
 
 
 
0eb6419
 
78959e0
315655d
0eb6419
315655d
e0db20e
6427929
78959e0
 
 
 
 
0eb6419
 
 
 
315655d
0eb6419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564ef30
0eb6419
 
 
908fcb9
0eb6419
 
 
315655d
e0db20e
 
 
 
 
315655d
0eb6419
315655d
0eb6419
 
 
 
315655d
564ef30
0eb6419
 
 
 
78959e0
 
564ef30
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
from glob import glob
import openai
from dotenv import load_dotenv

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_community.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = api_key

# Helper function to validate response completeness
def is_response_complete(response: str) -> bool:
    return response.strip()[-1] in ".!?"

# Retry mechanism for incomplete responses
def retry_response(messages):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages
    ).choices[0].message['content']
    if not is_response_complete(response):
        response += " This is the end of the response. Please let me know if you need further clarification."
    return response

def base_model_chatbot(messages):
    system_message = [
        {"role": "system", "content": "You are a helpful AI chatbot that provides clear, complete, and coherent responses to User's questions. Ensure your answers are in full sentences and complete the thought or idea."}
    ]
    messages = system_message + messages
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages
    ).choices[0].message['content']
    # Validate response completeness
    if not is_response_complete(response):
        response = retry_response(messages)
    return response

class VectorDB:
    """Class to manage document loading and vector database creation."""
    
    def __init__(self, docs_directory: str):
        self.docs_directory = docs_directory

    def create_vector_db(self):
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

        files = glob(os.path.join(self.docs_directory, "*.pdf"))

        loadPDFs = [PyPDFLoader(pdf_file) for pdf_file in files]

        pdf_docs = list()
        for loader in loadPDFs:
            pdf_docs.extend(loader.load())
        chunks = text_splitter.split_documents(pdf_docs)
            
        return Chroma.from_documents(chunks, OpenAIEmbeddings()) 
    
class ConversationalRetrievalChain:
    """Class to manage the QA chain setup."""

    def __init__(self, model_name="gpt-3.5-turbo", temperature=0):
        self.model_name = model_name
        self.temperature = temperature

    def create_chain(self):
        model = ChatOpenAI(
            model_name=self.model_name, 
            temperature=self.temperature,
            system_prompt="You are a knowledgeable AI that answers questions based on provided documents. Always give responses in clear, complete sentences."
        )
        memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
        vector_db = VectorDB('docs/')
        retriever = vector_db.create_vector_db().as_retriever(search_type="similarity", search_kwargs={"k": 2})
        return RetrievalQA.from_chain_type(
            llm=model,
            retriever=retriever,
            memory=memory,
        )
    
def with_pdf_chatbot(messages):
    query = messages[-1]['content'].strip()
    qa_chain = ConversationalRetrievalChain().create_chain()
    result = qa_chain({"query": query})
    if not is_response_complete(result['result']):
        result['result'] += " This is the end of the response. Let me know if you need further clarification."
    return result['result']