File size: 2,702 Bytes
df262b6
 
 
 
 
 
21da56d
 
 
 
df262b6
21da56d
df262b6
21da56d
df262b6
21da56d
df262b6
 
 
21da56d
 
 
 
 
 
 
df262b6
21da56d
df262b6
 
3766553
 
 
df262b6
 
 
 
21da56d
df262b6
 
 
21da56d
5323941
df262b6
 
04e36cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df262b6
04e36cb
df262b6
21da56d
04e36cb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.memory import ConversationBufferMemory
import pinecone
import os
from langchain.vectorstores import Chroma
from dotenv import load_dotenv
load_dotenv()

openai_api_key=os.getenv('OPENAI_API_KEY')

def create_conversation(query: str, chat_history: list, collection_name: str) -> tuple:
    try:

        embeddings = OpenAIEmbeddings(
            openai_api_key=openai_api_key
        )
        
        persist_directory = './db_metadata'
        
        db = Chroma(
            collection_name=collection_name,
            persist_directory=persist_directory,
            embedding_function=embeddings
        )
        
        memory = ConversationBufferMemory(
            memory_key='chat_history',
            return_messages=False,
            input_key='question',
            output_key='answer',
        )
        cqa = ConversationalRetrievalChain.from_llm(
            llm=ChatOpenAI(temperature=0.0,
                           openai_api_key=openai_api_key),
            chain_type='stuff',
            retriever=db.as_retriever(),
            memory=memory,
            get_chat_history=lambda h: h,
            verbose=True,
            return_source_documents=True,
        )
        result = cqa({'question': query, 'chat_history': chat_history})
        # chat_history.append((query, result['answer']))
        # return '', chat_history

        source_documents = result.get('source_documents', [])

        # Filter the source_documents to keep 'page', 'source', and 'page_content'
        filtered_documents = [{'page': doc.metadata.get('page', None),
                       'source': os.path.basename(doc.metadata.get('source', None)),
                       'page_content': doc.page_content}
                      for doc in source_documents]   
        # Format and print the output with spaces between documents
        output = ""
        for doc in filtered_documents:
            page = doc['page']
            source = doc['source']
            page_content = doc['page_content']
    
            output += f"Page: {page}\n"
            output += f"Source: {source}\n"
            output += f"Page Content: {page_content}\n"
            output += "\n"  # Adding new lines between documents for better readability

             
        chat_history.append((query, result['answer']))
        return '',chat_history, output
    except Exception as e:
        chat_history.append((query, f"Unexpected Error: {e}"))
        return '', chat_history, output