File size: 2,401 Bytes
e24982e 9ae9134 b3dd8dc e24982e 9eeafb7 8d877c4 2db6d7c b9c49de b3dd8dc cd986d0 16fef80 2db6d7c b3dd8dc 2db6d7c cd986d0 8d877c4 cd986d0 8d877c4 a52eb23 2db6d7c 8d877c4 b9c49de b3dd8dc a52eb23 2db6d7c e24982e b3dd8dc 2db6d7c 8c67ed3 b3dd8dc 9ae9134 b3dd8dc 9ae9134 b3dd8dc 9ae9134 b3dd8dc 9ae9134 b3dd8dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import os
import time
import streamlit as st
from getpass import getpass
from openai import OpenAI
from llama_index.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings import OpenAIEmbedding
from llama_index.ingestion import IngestionPipeline
from pinecone.grpc import PineconeGRPC
from pinecone import ServerlessSpec
from llama_index.vector_stores import PineconeVectorStore
from llama_index import VectorStoreIndex
from llama_index.retrievers import VectorIndexRetriever
# Set OpenAI API key from Streamlit secrets
pinecone_api_key = os.getenv("PINECONE_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")
# Initialize OpenAI client
client = OpenAI(api_key=openai_api_key)
# Initialize connection to Pinecone
pc = PineconeGRPC(api_key=pinecone_api_key)
index_name = "anualreport"
# Initialize your index
pinecone_index = pc.Index(index_name)
# Initialize VectorStore
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
pinecone_index.describe_index_stats()
# Initialize vector index and retriever
vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
query_engine = RetrieverQueryEngine(retriever=retriever)
# Set up LlamaIndex embedding model and pipeline
embed_model = OpenAIEmbedding(api_key=openai_api_key)
pipeline = IngestionPipeline(
transformations=[
SemanticSplitterNodeParser(buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model),
embed_model,
],
)
def query_annual_report(query):
response = query_engine.query(query)
return response.response
# Streamlit app setup
st.title("ChatGPT-like Clone with Pinecone Integration")
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Accept user input
if prompt := st.chat_input("What is up?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
response = query_annual_report(prompt)
st.markdown(response)
st.session_state.messages.append({"role": "assistant", "content": response})
|