Spaces:
Sleeping
Sleeping
File size: 3,062 Bytes
bfe822b 08b1a57 bfe822b 08b1a57 ca2edb1 08b1a57 ca2edb1 08b1a57 bfe822b 22fab8b ca2edb1 22fab8b ca2edb1 22fab8b bfe822b ca2edb1 ee62985 ca2edb1 bfe822b c56cc6d bfe822b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import os
import streamlit as st
from openai import OpenAI
from llama_index.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings import OpenAIEmbedding
from llama_index.ingestion import IngestionPipeline
from pinecone.grpc import PineconeGRPC
from pinecone import ServerlessSpec
from llama_index.vector_stores import PineconeVectorStore
from llama_index import VectorStoreIndex
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
# Set OpenAI API key from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("INDEX_NAME")
# Initialize OpenAI client
client = OpenAI(api_key=openai_api_key)
# Initialize connection to Pinecone
pc = PineconeGRPC(api_key=pinecone_api_key)
# Initialize Pinecone index
pinecone_index = pc.Index(index_name)
# Dropdown for selecting namespace
st.sidebar.title("Sector Selection")
namespace = st.sidebar.selectbox(
"Select a Namespace",
["cement", "engineering", "food", "Fuel", "IT", "ceramics", "Estate", "paper", "Tannery", "Telecommunication", "miscellaneous", "Textile", "Travel"]
)
# Initialize VectorStore with the selected namespace
vector_store = PineconeVectorStore(pinecone_index=pinecone_index, namespace=namespace)
pinecone_index.describe_index_stats()
# Initialize vector index and retriever
vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
query_engine = RetrieverQueryEngine(retriever=retriever)
# Set up LlamaIndex embedding model and pipeline
embed_model = OpenAIEmbedding(api_key=openai_api_key)
pipeline = IngestionPipeline(
transformations=[
SemanticSplitterNodeParser(buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model),
embed_model,
],
)
def query_annual_report(query):
response = query_engine.query(query)
return response.response
# Adjust the title based on the selected model
st.header("BD Annual Reports")
with st.expander("About this App"):
st.write(f"""
This ChatApp allows users to communicate with custom Annual Reports from BD Sectors.
""")
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Clear chat button
if st.sidebar.button("Clear Chat"):
st.session_state.messages.clear()
# Display chat messages from history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Accept user input
if prompt := st.chat_input("Type your question based on the BD-Sector report selected?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
response = query_annual_report(prompt)
st.markdown(response)
st.session_state.messages.append({"role": "assistant", "content": response})
|