import os import httpx from dotenv import load_dotenv import gradio as gr from phi.agent import Agent from phi.model.groq import Groq from phi.storage.agent.postgres import PgAgentStorage from phi.knowledge.pdf import PDFUrlKnowledgeBase from phi.vectordb.pgvector import PgVector, SearchType from phi.embedder.google import GeminiEmbedder # Load environment variables load_dotenv() os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY") # HTTP client client = httpx.Client(timeout=httpx.Timeout(60.0)) # Database connection db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai?connect_timeout=60" # Gradio processing function def summarize_pdf(pdf_url): try: # Set up the knowledge base knowledge_base = PDFUrlKnowledgeBase( urls=[pdf_url], vector_db=PgVector( table_name="recipies1", db_url=db_url, embedder=GeminiEmbedder(dimensions=768), search_type=SearchType.hybrid, ) ) # Load and upsert the knowledge base knowledge_base.load(recreate=True, upsert=True) # Set up storage and agent storage = PgAgentStorage(table_name="pdf-assistant1", db_url=db_url) agent = Agent( model=Groq(id="llama-3.3-70b-versatile"), knowledge=knowledge_base, storage=storage, ) # Generate summary response = agent.run("Summarize the document within 5000 tokens!") return response.content except Exception as e: return f"An error occurred: {e}" # Gradio interface iface = gr.Interface( fn=summarize_pdf, inputs=gr.Textbox( label="PDF URL", placeholder="Enter the URL of a PDF document...", lines=1 ), outputs=gr.Textbox( label="Output Summary", placeholder="The summary will appear here...", lines=15 ), examples=[ ["https://www.joghat.org/uploads/2024-vol-7-issue-1-full-text-405.pdf"] ], description="Enter a PDF URL to generate a concise summary of the document.", title="PDF Summarizer" ) # Launch the interface iface.launch(share=True, debug=True)