File size: 3,062 Bytes
bfe822b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08b1a57
bfe822b
 
08b1a57
ca2edb1
08b1a57
 
ca2edb1
08b1a57
 
 
 
bfe822b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22fab8b
ca2edb1
22fab8b
 
 
ca2edb1
22fab8b
bfe822b
 
 
 
 
ca2edb1
 
ee62985
ca2edb1
bfe822b
 
 
 
 
 
c56cc6d
bfe822b
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import streamlit as st
from openai import OpenAI
from llama_index.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings import OpenAIEmbedding
from llama_index.ingestion import IngestionPipeline
from pinecone.grpc import PineconeGRPC
from pinecone import ServerlessSpec
from llama_index.vector_stores import PineconeVectorStore
from llama_index import VectorStoreIndex
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine

# Set OpenAI API key from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("INDEX_NAME")

# Initialize OpenAI client
client = OpenAI(api_key=openai_api_key)

# Initialize connection to Pinecone
pc = PineconeGRPC(api_key=pinecone_api_key)

# Initialize Pinecone index
pinecone_index = pc.Index(index_name)

# Dropdown for selecting namespace
st.sidebar.title("Sector Selection")
namespace = st.sidebar.selectbox(
    "Select a Namespace",
    ["cement", "engineering", "food", "Fuel", "IT", "ceramics", "Estate", "paper", "Tannery", "Telecommunication", "miscellaneous", "Textile", "Travel"]
)

# Initialize VectorStore with the selected namespace
vector_store = PineconeVectorStore(pinecone_index=pinecone_index, namespace=namespace)
pinecone_index.describe_index_stats()

# Initialize vector index and retriever
vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
query_engine = RetrieverQueryEngine(retriever=retriever)

# Set up LlamaIndex embedding model and pipeline
embed_model = OpenAIEmbedding(api_key=openai_api_key)
pipeline = IngestionPipeline(
    transformations=[
        SemanticSplitterNodeParser(buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model),
        embed_model,
    ],
)

def query_annual_report(query):
    response = query_engine.query(query)
    return response.response

# Adjust the title based on the selected model
st.header("BD Annual Reports")

with st.expander("About this App"):
    st.write(f"""
    This ChatApp allows users to communicate with custom Annual Reports from BD Sectors. 
    """)

# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = []

# Clear chat button
if st.sidebar.button("Clear Chat"):
    st.session_state.messages.clear()

# Display chat messages from history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Accept user input
if prompt := st.chat_input("Type your question based on the BD-Sector report selected?"):
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    with st.chat_message("assistant"):
        response = query_annual_report(prompt)
        st.markdown(response)
    st.session_state.messages.append({"role": "assistant", "content": response})