Spaces:
Sleeping
Sleeping
File size: 2,653 Bytes
0eb6419 315655d 0eb6419 e021db4 315655d 0eb6419 315655d e021db4 d8ae64a 0eb6419 315655d 0eb6419 315655d 0eb6419 908fcb9 0eb6419 315655d 0eb6419 315655d 0eb6419 315655d d8ae64a 0eb6419 315655d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import os
from glob import glob
import openai
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
# Corrected line: Set the OpenAI API key correctly
openai.api_key = api_key
def base_model_chatbot(messages):
system_message = [
{"role": "system", "content": "You are a helpful AI chatbot, that answers questions asked by User."}
]
messages = system_message + messages
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages,
max_tokens=1500 # Increase max_tokens limit
)
return response.choices[0].message['content']
class VectorDB:
"""Class to manage document loading and vector database creation."""
def __init__(self, docs_directory: str):
self.docs_directory = docs_directory
def create_vector_db(self):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
files = glob(os.path.join(self.docs_directory, "*.pdf"))
loadPDFs = [PyPDFLoader(pdf_file) for pdf_file in files]
pdf_docs = list()
for loader in loadPDFs:
pdf_docs.extend(loader.load())
chunks = text_splitter.split_documents(pdf_docs)
return Chroma.from_documents(chunks, OpenAIEmbeddings())
class ConversationalRetrievalChain:
"""Class to manage the QA chain setup."""
def __init__(self, model_name="gpt-3.5-turbo", temperature=0):
self.model_name = model_name
self.temperature = temperature
def create_chain(self):
model = ChatOpenAI(model_name=self.model_name, temperature=self.temperature)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
vector_db = VectorDB('docs/')
retriever = vector_db.create_vector_db().as_retriever(search_type="similarity", search_kwargs={"k": 2})
return RetrievalQA.from_chain_type(
llm=model,
retriever=retriever,
memory=memory,
)
def with_pdf_chatbot(messages):
"""Main function to execute the QA system."""
query = messages[-1]['content'].strip()
qa_chain = ConversationalRetrievalChain().create_chain()
result = qa_chain({"query": query})
return result['result']
|