import os import gradio as gr from llama_index.core import SimpleDirectoryReader, VectorStoreIndex from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding from llama_index.llms.groq import Groq from llama_parse import LlamaParse # ──────────────────────────────── # 1. Check environment variables # ──────────────────────────────── llama_cloud_key = os.getenv("LLAMA_CLOUD_API_KEY") groq_key = os.getenv("GROQ_API_KEY") mxbai_key = os.getenv("MXBAI_API_KEY") if not (llama_cloud_key and groq_key and mxbai_key): raise EnvironmentError( "LLAMA_CLOUD_API_KEY, GROQ_API_KEY and MXBAI_API_KEY must be set." ) # ──────────────────────────────── # 2. Model / parser setup # ──────────────────────────────── LLM_MODEL = "llama-3.1-70b-versatile" EMBED_MODEL = "mixedbread-ai/mxbai-embed-large-v1" parser = LlamaParse(api_key=llama_cloud_key, result_type="markdown") file_extractor = {ext: parser for ext in ( ".pdf", ".docx", ".doc", ".txt", ".csv", ".xlsx", ".pptx", ".html", ".jpg", ".jpeg", ".png", ".webp", ".svg", )} embed_model = MixedbreadAIEmbedding(api_key=mxbai_key, model_name=EMBED_MODEL) llm = Groq(model=LLM_MODEL, api_key=groq_key) # Global cache for the current document vector_index = None # ──────────────────────────────── # 3. Helper functions # ──────────────────────────────── def load_files(file_path: str) -> str: """Parse the uploaded document and build a vector index.""" global vector_index if not file_path: return "⚠️ No file selected." if not any(file_path.endswith(ext) for ext in file_extractor): return ("⚠️ Unsupported file type. " f"Allowed: {', '.join(file_extractor.keys())}") docs = SimpleDirectoryReader( input_files=[file_path], file_extractor=file_extractor ).load_data() vector_index = VectorStoreIndex.from_documents(docs, embed_model=embed_model) return f"✅ Parsed **{os.path.basename(file_path)}**. Ask away!" def respond(message: str, history: list) -> str: """Chat handler. Streams partial tokens back to the UI.""" if vector_index is None: return "➡️ Please upload a document first." query_engine = vector_index.as_query_engine(streaming=True, llm=llm) streaming_resp = query_engine.query(message) partial = "" for chunk in streaming_resp.response_gen: partial += chunk yield partial # <─ streaming to the frontend def clear_state(): """Reset everything.""" global vector_index vector_index = None return [None, ""] # ──────────────────────────────── # 4. Gradio UI # ──────────────────────────────── with gr.Blocks( theme=gr.themes.Default( primary_hue="green", secondary_hue="blue", font=[gr.themes.GoogleFont("Poppins")] ), css="footer {visibility: hidden}" ) as demo: gr.Markdown("

DataCamp Doc Q&A 🤖📃

") with gr.Row(): with gr.Column(scale=1): file_input = gr.File(file_count="single", type="filepath", label="Upload document") with gr.Row(): submit_btn = gr.Button("Submit", variant="primary") clear_btn = gr.Button("Clear") status_box = gr.Markdown() with gr.Column(scale=3): chat = gr.ChatInterface( fn=respond, chatbot=gr.Chatbot(height=300), show_progress="full", # keep the nice progress bar textbox=gr.Textbox( placeholder="Ask a question about the uploaded document…", container=False, ), ) submit_btn.click(load_files, inputs=file_input, outputs=status_box) clear_btn.click(clear_state, outputs=[file_input, status_box]) # Disable OpenAPI generation (avoids the bool/‘const’ bug) … demo.queue(api_open=False) # ──────────────────────────────── # 5. Launch # ──────────────────────────────── if __name__ == "__main__": # …and make a public share link so the container doesn’t choke on localhost demo.launch(share=True, server_name="0.0.0.0", server_port=7860)