Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex | |
from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding | |
from llama_index.llms.groq import Groq | |
from llama_parse import LlamaParse | |
# ββββββββββββββββββββββββββββββββ | |
# 1. Check environment variables | |
# ββββββββββββββββββββββββββββββββ | |
llama_cloud_key = os.getenv("LLAMA_CLOUD_API_KEY") | |
groq_key = os.getenv("GROQ_API_KEY") | |
mxbai_key = os.getenv("MXBAI_API_KEY") | |
if not (llama_cloud_key and groq_key and mxbai_key): | |
raise EnvironmentError( | |
"LLAMA_CLOUD_API_KEY, GROQ_API_KEY and MXBAI_API_KEY must be set." | |
) | |
# ββββββββββββββββββββββββββββββββ | |
# 2. Model / parser setup | |
# ββββββββββββββββββββββββββββββββ | |
LLM_MODEL = "llama-3.1-70b-versatile" | |
EMBED_MODEL = "mixedbread-ai/mxbai-embed-large-v1" | |
parser = LlamaParse(api_key=llama_cloud_key, result_type="markdown") | |
file_extractor = {ext: parser for ext in ( | |
".pdf", ".docx", ".doc", ".txt", ".csv", ".xlsx", | |
".pptx", ".html", ".jpg", ".jpeg", ".png", ".webp", ".svg", | |
)} | |
embed_model = MixedbreadAIEmbedding(api_key=mxbai_key, model_name=EMBED_MODEL) | |
llm = Groq(model=LLM_MODEL, api_key=groq_key) | |
# Global cache for the current document | |
vector_index = None | |
# ββββββββββββββββββββββββββββββββ | |
# 3. Helper functions | |
# ββββββββββββββββββββββββββββββββ | |
def load_files(file_path: str) -> str: | |
"""Parse the uploaded document and build a vector index.""" | |
global vector_index | |
if not file_path: | |
return "β οΈ No file selected." | |
if not any(file_path.endswith(ext) for ext in file_extractor): | |
return ("β οΈ Unsupported file type. " | |
f"Allowed: {', '.join(file_extractor.keys())}") | |
docs = SimpleDirectoryReader( | |
input_files=[file_path], file_extractor=file_extractor | |
).load_data() | |
vector_index = VectorStoreIndex.from_documents(docs, embed_model=embed_model) | |
return f"β Parsed **{os.path.basename(file_path)}**. Ask away!" | |
def respond(message: str, history: list) -> str: | |
"""Chat handler. Streams partial tokens back to the UI.""" | |
if vector_index is None: | |
return "β‘οΈ Please upload a document first." | |
query_engine = vector_index.as_query_engine(streaming=True, llm=llm) | |
streaming_resp = query_engine.query(message) | |
partial = "" | |
for chunk in streaming_resp.response_gen: | |
partial += chunk | |
yield partial # <β streaming to the frontend | |
def clear_state(): | |
"""Reset everything.""" | |
global vector_index | |
vector_index = None | |
return [None, ""] | |
# ββββββββββββββββββββββββββββββββ | |
# 4. Gradio UI | |
# ββββββββββββββββββββββββββββββββ | |
with gr.Blocks( | |
theme=gr.themes.Default( | |
primary_hue="green", | |
secondary_hue="blue", | |
font=[gr.themes.GoogleFont("Poppins")] | |
), | |
css="footer {visibility: hidden}" | |
) as demo: | |
gr.Markdown("<h1 style='text-align:center'>DataCamp Doc Q&A π€π</h1>") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
file_input = gr.File(file_count="single", | |
type="filepath", | |
label="Upload document") | |
with gr.Row(): | |
submit_btn = gr.Button("Submit", variant="primary") | |
clear_btn = gr.Button("Clear") | |
status_box = gr.Markdown() | |
with gr.Column(scale=3): | |
chat = gr.ChatInterface( | |
fn=respond, | |
chatbot=gr.Chatbot(height=300), | |
show_progress="full", # keep the nice progress bar | |
textbox=gr.Textbox( | |
placeholder="Ask a question about the uploaded documentβ¦", | |
container=False, | |
), | |
) | |
submit_btn.click(load_files, inputs=file_input, outputs=status_box) | |
clear_btn.click(clear_state, outputs=[file_input, status_box]) | |
# Disable OpenAPI generation (avoids the bool/βconstβ bug) β¦ | |
demo.queue(api_open=False) | |
# ββββββββββββββββββββββββββββββββ | |
# 5. Launch | |
# ββββββββββββββββββββββββββββββββ | |
if __name__ == "__main__": | |
# β¦and make a public share link so the container doesnβt choke on localhost | |
demo.launch(share=True, server_name="0.0.0.0", server_port=7860) | |