kingabzpro commited on
Commit
e4e6dc4
Β·
verified Β·
1 Parent(s): 66b553a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -89
app.py CHANGED
@@ -1,130 +1,125 @@
1
  import os
2
-
3
  import gradio as gr
4
  from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
5
  from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding
6
  from llama_index.llms.groq import Groq
7
  from llama_parse import LlamaParse
8
 
9
- # API keys
10
- llama_cloud_key = os.environ.get("LLAMA_CLOUD_API_KEY")
11
- groq_key = os.environ.get("GROQ_API_KEY")
12
- mxbai_key = os.environ.get("MXBAI_API_KEY")
 
 
 
13
  if not (llama_cloud_key and groq_key and mxbai_key):
14
- raise ValueError(
15
- "API Keys not found! Ensure they are passed to the Docker container."
16
  )
17
 
18
- # models name
19
- llm_model_name = "llama-3.1-70b-versatile"
20
- embed_model_name = "mixedbread-ai/mxbai-embed-large-v1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Initialize the parser
23
- parser = LlamaParse(api_key=llama_cloud_key, result_type="markdown")
 
24
 
25
- # Define file extractor with various common extensions
26
- file_extractor = {
27
- ".pdf": parser,
28
- ".docx": parser,
29
- ".doc": parser,
30
- ".txt": parser,
31
- ".csv": parser,
32
- ".xlsx": parser,
33
- ".pptx": parser,
34
- ".html": parser,
35
- ".jpg": parser,
36
- ".jpeg": parser,
37
- ".png": parser,
38
- ".webp": parser,
39
- ".svg": parser,
40
- }
41
 
42
- # Initialize the embedding model
43
- embed_model = MixedbreadAIEmbedding(api_key=mxbai_key, model_name=embed_model_name)
44
 
45
- # Initialize the LLM
46
 
47
- llm = Groq(model="llama-3.1-70b-versatile", api_key=groq_key)
 
 
 
 
 
 
 
 
 
 
 
48
 
49
 
50
- # File processing function
51
- def load_files(file_path: str):
52
- global vector_index
53
- if not file_path:
54
- return "No file path provided. Please upload a file."
55
-
56
- valid_extensions = ', '.join(file_extractor.keys())
57
- if not any(file_path.endswith(ext) for ext in file_extractor):
58
- return f"The parser can only parse the following file types: {valid_extensions}"
59
-
60
- document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
61
- vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
62
- print(f"Parsing completed for: {file_path}")
63
- filename = os.path.basename(file_path)
64
- return f"Ready to provide responses based on: {filename}"
65
-
66
-
67
- # Respond function
68
- def respond(message, history):
69
- try:
70
- # Use the preloaded LLM
71
- query_engine = vector_index.as_query_engine(streaming=True, llm=llm)
72
- streaming_response = query_engine.query(message)
73
- partial_text = ""
74
- for new_text in streaming_response.response_gen:
75
- partial_text += new_text
76
- # Yield an empty string to cleanup the message textbox and the updated conversation history
77
- yield partial_text
78
- except (AttributeError, NameError):
79
- print("An error occurred while processing your request.")
80
- yield "Please upload the file to begin chat."
81
-
82
-
83
- # Clear function
84
  def clear_state():
 
85
  global vector_index
86
  vector_index = None
87
- return [None, None, None]
88
 
89
 
90
- # UI Setup
 
 
91
  with gr.Blocks(
92
  theme=gr.themes.Default(
93
  primary_hue="green",
94
  secondary_hue="blue",
95
- font=[gr.themes.GoogleFont("Poppins")],
96
  ),
97
- css="footer {visibility: hidden}",
98
  ) as demo:
99
- gr.Markdown("# DataCamp Doc Q&A πŸ€–πŸ“ƒ")
 
100
  with gr.Row():
101
  with gr.Column(scale=1):
102
- file_input = gr.File(
103
- file_count="single", type="filepath", label="Upload Document"
104
- )
105
  with gr.Row():
106
- btn = gr.Button("Submit", variant="primary")
107
- clear = gr.Button("Clear")
108
- output = gr.Textbox(label="Status")
 
109
  with gr.Column(scale=3):
110
- chatbot = gr.ChatInterface(
111
  fn=respond,
112
  chatbot=gr.Chatbot(height=300),
113
- theme="soft",
114
- show_progress="full",
115
  textbox=gr.Textbox(
116
- placeholder="Ask questions about the uploaded document!",
117
  container=False,
118
  ),
119
  )
120
 
121
- # Set up Gradio interactions
122
- btn.click(fn=load_files, inputs=file_input, outputs=output)
123
- clear.click(
124
- fn=clear_state, # Use the clear_state function
125
- outputs=[file_input, output],
126
- )
127
 
128
- # Launch the demo
 
 
129
  if __name__ == "__main__":
130
- demo.launch()
 
 
1
  import os
 
2
  import gradio as gr
3
  from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
4
  from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding
5
  from llama_index.llms.groq import Groq
6
  from llama_parse import LlamaParse
7
 
8
+ # ────────────────────────────────
9
+ # 1. Check environment variables
10
+ # ────────────────────────────────
11
+ llama_cloud_key = os.getenv("LLAMA_CLOUD_API_KEY")
12
+ groq_key = os.getenv("GROQ_API_KEY")
13
+ mxbai_key = os.getenv("MXBAI_API_KEY")
14
+
15
  if not (llama_cloud_key and groq_key and mxbai_key):
16
+ raise EnvironmentError(
17
+ "LLAMA_CLOUD_API_KEY, GROQ_API_KEY and MXBAI_API_KEY must be set."
18
  )
19
 
20
+ # ────────────────────────────────
21
+ # 2. Model / parser setup
22
+ # ────────────────────────────────
23
+ LLM_MODEL = "llama-3.1-70b-versatile"
24
+ EMBED_MODEL = "mixedbread-ai/mxbai-embed-large-v1"
25
+
26
+ parser = LlamaParse(api_key=llama_cloud_key, result_type="markdown")
27
+ file_extractor = {ext: parser for ext in (
28
+ ".pdf", ".docx", ".doc", ".txt", ".csv", ".xlsx",
29
+ ".pptx", ".html", ".jpg", ".jpeg", ".png", ".webp", ".svg",
30
+ )}
31
+ embed_model = MixedbreadAIEmbedding(api_key=mxbai_key, model_name=EMBED_MODEL)
32
+ llm = Groq(model=LLM_MODEL, api_key=groq_key)
33
+
34
+ # Global cache for the current document
35
+ vector_index = None
36
+
37
+
38
+ # ────────────────────────────────
39
+ # 3. Helper functions
40
+ # ────────────────────────────────
41
+ def load_files(file_path: str) -> str:
42
+ """Parse the uploaded document and build a vector index."""
43
+ global vector_index
44
+ if not file_path:
45
+ return "⚠️ No file selected."
46
 
47
+ if not any(file_path.endswith(ext) for ext in file_extractor):
48
+ return ("⚠️ Unsupported file type. "
49
+ f"Allowed: {', '.join(file_extractor.keys())}")
50
 
51
+ docs = SimpleDirectoryReader(
52
+ input_files=[file_path], file_extractor=file_extractor
53
+ ).load_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ vector_index = VectorStoreIndex.from_documents(docs, embed_model=embed_model)
56
+ return f"βœ… Parsed **{os.path.basename(file_path)}**. Ask away!"
57
 
 
58
 
59
+ def respond(message: str, history: list) -> str:
60
+ """Chat handler. Streams partial tokens back to the UI."""
61
+ if vector_index is None:
62
+ return "➑️ Please upload a document first."
63
+
64
+ query_engine = vector_index.as_query_engine(streaming=True, llm=llm)
65
+ streaming_resp = query_engine.query(message)
66
+
67
+ partial = ""
68
+ for chunk in streaming_resp.response_gen:
69
+ partial += chunk
70
+ yield partial # <─ streaming to the frontend
71
 
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def clear_state():
74
+ """Reset everything."""
75
  global vector_index
76
  vector_index = None
77
+ return [None, ""]
78
 
79
 
80
+ # ────────────────────────────────
81
+ # 4. Gradio UI
82
+ # ────────────────────────────────
83
  with gr.Blocks(
84
  theme=gr.themes.Default(
85
  primary_hue="green",
86
  secondary_hue="blue",
87
+ font=[gr.themes.GoogleFont("Poppins")]
88
  ),
89
+ css="footer {visibility: hidden}"
90
  ) as demo:
91
+
92
+ gr.Markdown("<h1 style='text-align:center'>DataCamp Doc Q&A πŸ€–πŸ“ƒ</h1>")
93
  with gr.Row():
94
  with gr.Column(scale=1):
95
+ file_input = gr.File(file_count="single",
96
+ type="filepath",
97
+ label="Upload document")
98
  with gr.Row():
99
+ submit_btn = gr.Button("Submit", variant="primary")
100
+ clear_btn = gr.Button("Clear")
101
+ status_box = gr.Markdown()
102
+
103
  with gr.Column(scale=3):
104
+ chat = gr.ChatInterface(
105
  fn=respond,
106
  chatbot=gr.Chatbot(height=300),
107
+ show_progress="full", # keep the nice progress bar
 
108
  textbox=gr.Textbox(
109
+ placeholder="Ask a question about the uploaded document…",
110
  container=False,
111
  ),
112
  )
113
 
114
+ submit_btn.click(load_files, inputs=file_input, outputs=status_box)
115
+ clear_btn.click(clear_state, outputs=[file_input, status_box])
116
+
117
+ # Disable OpenAPI generation (avoids the bool/β€˜const’ bug) …
118
+ demo.queue(api_open=False)
 
119
 
120
+ # ────────────────────────────────
121
+ # 5. Launch
122
+ # ────────────────────────────────
123
  if __name__ == "__main__":
124
+ # …and make a public share link so the container doesn’t choke on localhost
125
+ demo.launch(share=True, server_name="0.0.0.0", server_port=7860)