import gradio as gr import httpx import time from datetime import datetime from llama_index.core import VectorStoreIndex, SimpleDirectoryReader from llama_parse import LlamaParse from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI # Define the LLM model and document processing llm = HuggingFaceInferenceAPI(model_name="tiiuae/falcon-7b-instruct") parser = LlamaParse(api_key='llx-zKtsC5UBLs8DOApOsLluXMBdQhC75ea0Vs80SmPSjsmDzuhh', result_type='markdown') file_extractor = {'.pdf': parser} documents = SimpleDirectoryReader('data/', file_extractor=file_extractor).load_data() embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") vector_index = VectorStoreIndex.from_documents(documents, embed_model=embed_model) query_engine = vector_index.as_query_engine(llm=llm) # Function to process query with retries def query_with_retry(query, max_retries=3, wait_time=5): for attempt in range(max_retries): try: start_time = datetime.now() response = query_engine.query(query) end_time = datetime.now() duration = (end_time - start_time).total_seconds() return response except httpx.ReadTimeout: if attempt < max_retries - 1: time.sleep(wait_time) else: raise except Exception as e: return f"An error occurred: {e}" # Define the Gradio interface def gradio_interface(query): response = query_with_retry(query) return response # Set up the Gradio UI iface = gr.Interface( fn=gradio_interface, inputs=gr.Textbox(label="Ask a question", placeholder="Enter your query here...", lines=2), outputs=gr.Textbox(label="Response", interactive=False), title="Document-based Question Answering", description="Ask questions based on the documents you uploaded. This model answers queries using your uploaded PDFs.", live=True ) iface.launch()