File size: 2,297 Bytes
e24982e a52eb23 2db6d7c e24982e 9eeafb7 63614ef 2db6d7c a52eb23 2db6d7c a52eb23 9eeafb7 2db6d7c a52eb23 2db6d7c a52eb23 2db6d7c a52eb23 2db6d7c a52eb23 5db7189 2db6d7c a52eb23 5db7189 2db6d7c a52eb23 2db6d7c a52eb23 63614ef 2db6d7c 9eeafb7 2db6d7c e24982e 2db6d7c 8c67ed3 2db6d7c 8c67ed3 2db6d7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
from getpass import getpass
import gradio as gr
pinecone_api_key = os.getenv("PINECONE_API_KEY") or getpass("Enter your Pinecone API Key: ")
openai_api_key = os.getenv("OPENAI_API_KEY") or getpass("Enter your OpenAI API Key: ")
from llama_index.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings import OpenAIEmbedding
from llama_index.ingestion import IngestionPipeline
# This will be the model we use both for Node parsing and for vectorization
embed_model = OpenAIEmbedding(api_key=openai_api_key)
# Define the initial pipeline
pipeline = IngestionPipeline(
transformations=[
SemanticSplitterNodeParser(
buffer_size=1,
breakpoint_percentile_threshold=95,
embed_model=embed_model,
),
embed_model,
],
)
from pinecone.grpc import PineconeGRPC
from pinecone import ServerlessSpec
from llama_index.vector_stores import PineconeVectorStore
# Initialize connection to Pinecone
pc = PineconeGRPC(api_key=pinecone_api_key)
index_name = "anualreport"
# Initialize your index
pinecone_index = pc.Index(index_name)
# Initialize VectorStore
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
pinecone_index.describe_index_stats()
from llama_index import VectorStoreIndex
from llama_index.retrievers import VectorIndexRetriever
# Set the OpenAI API key if not already set
if not os.getenv('OPENAI_API_KEY'):
os.environ['OPENAI_API_KEY'] = openai_api_key
# Instantiate VectorStoreIndex object from our vector_store object
vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
# Grab 5 search results
retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
from llama_index.query_engine import RetrieverQueryEngine
# Pass in your retriever from above, which is configured to return the top 5 results
query_engine = RetrieverQueryEngine(retriever=retriever)
def query_anual_report(query):
response = query_engine.query(query)
return response.response
# Define Gradio Interface
iface = gr.Interface(
fn=query_anual_report,
inputs=gr.inputs.Textbox(lines=2, placeholder="Ask something..."),
outputs="text",
title="Annual Report Query",
description="Ask questions about the annual report."
)
iface.launch()
|