# app.py import os import logging import gradio as gr from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext from llama_index.vector_stores.pinecone import PineconeVectorStore from pinecone import Pinecone, ServerlessSpec # Logging setup logging.basicConfig(level=logging.INFO) api_key = os.environ["PINECONE_API_KEY"] # Initialize Pinecone pc = Pinecone(api_key=api_key) index_name = "quickstart" dimension = 1536 # Delete index if exists (optional) if index_name in [idx['name'] for idx in pc.list_indexes()]: pc.delete_index(index_name) # Create new index pc.create_index( name=index_name, dimension=dimension, metric="euclidean", spec=ServerlessSpec(cloud="aws", region="us-east-1"), ) pinecone_index = pc.Index(index_name) # Download data if not exists os.makedirs("data/paul_graham", exist_ok=True) file_path = "data/paul_graham/paul_graham_essay.txt" if not os.path.exists(file_path): import urllib.request urllib.request.urlretrieve( "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt", file_path ) # Load documents documents = SimpleDirectoryReader("data/paul_graham/").load_data() # Build vector index vector_store = PineconeVectorStore(pinecone_index=pinecone_index) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) query_engine = index.as_query_engine() # Gradio UI function def query_doc(prompt): try: response = query_engine.query(prompt) return str(response) except Exception as e: return f"Error: {str(e)}" # Launch Gradio app gr.Interface( fn=query_doc, inputs=gr.Textbox(label="Ask a question about the document"), outputs=gr.Textbox(label="Answer"), title="Paul Graham Document QA (LlamaIndex + Pinecone)", description="Ask questions based on the indexed Paul Graham essay. Powered by LlamaIndex & Pinecone." ).launch()