schoolkithub commited on
Commit
d6e0d11
·
verified ·
1 Parent(s): aec2266

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -120
app.py CHANGED
@@ -8,32 +8,35 @@ from duckduckgo_search import DDGS
8
  import wikipediaapi
9
  from bs4 import BeautifulSoup
10
  import pdfplumber
 
11
 
12
- # ==== CONFIG ====
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
- HF_TOKEN = os.getenv("HF_TOKEN")
15
- # Your list of SOTA chat models, in order of preference
16
- CONVERSATIONAL_MODELS = [
 
17
  "deepseek-ai/DeepSeek-V2-Chat",
18
  "Qwen/Qwen2-72B-Instruct",
19
  "mistralai/Mixtral-8x22B-Instruct-v0.1",
20
- "meta-llama/Meta-Llama-3-70B-Instruct",
21
- "deepseek-ai/DeepSeek-Coder-33B-Instruct"
22
  ]
23
 
24
  wiki_api = wikipediaapi.Wikipedia(language="en", user_agent="SmartAgent/1.0 ([email protected])")
25
 
26
- # ==== UTILITY: Link/file detection ====
27
  def extract_links(text):
 
 
28
  url_pattern = re.compile(r'(https?://[^\s\)\],]+)')
29
- return url_pattern.findall(text or "")
30
 
31
  def download_file(url, out_dir="tmp_files"):
32
  os.makedirs(out_dir, exist_ok=True)
33
  filename = url.split("/")[-1].split("?")[0]
34
  local_path = os.path.join(out_dir, filename)
35
  try:
36
- r = requests.get(url, timeout=20)
37
  r.raise_for_status()
38
  with open(local_path, "wb") as f:
39
  f.write(r.content)
@@ -41,49 +44,88 @@ def download_file(url, out_dir="tmp_files"):
41
  except Exception:
42
  return None
43
 
44
- # ==== File/Link Analyzers ====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def analyze_file(file_path):
 
46
  if file_path.endswith((".xlsx", ".xls")):
47
- try:
48
- df = pd.read_excel(file_path)
49
- return f"Excel summary: {df.head().to_markdown(index=False)}"
50
- except Exception as e:
51
- return f"Excel error: {e}"
52
  elif file_path.endswith(".csv"):
53
- try:
54
- df = pd.read_csv(file_path)
55
- return f"CSV summary: {df.head().to_markdown(index=False)}"
56
- except Exception as e:
57
- return f"CSV error: {e}"
58
  elif file_path.endswith(".pdf"):
59
- try:
60
- with pdfplumber.open(file_path) as pdf:
61
- first_page = pdf.pages[0].extract_text()
62
- return f"PDF text sample: {first_page[:1000]}"
63
- except Exception as e:
64
- return f"PDF error: {e}"
65
  elif file_path.endswith(".txt"):
66
- try:
67
- with open(file_path, encoding='utf-8') as f:
68
- txt = f.read()
69
- return f"TXT file sample: {txt[:1000]}"
70
- except Exception as e:
71
- return f"TXT error: {e}"
72
  else:
73
  return f"Unsupported file type: {file_path}"
74
 
75
  def analyze_webpage(url):
76
  try:
77
- r = requests.get(url, timeout=15)
78
  soup = BeautifulSoup(r.text, "lxml")
79
  title = soup.title.string if soup.title else "No title"
80
  paragraphs = [p.get_text() for p in soup.find_all("p")]
81
  article_sample = "\n".join(paragraphs[:5])
82
- return f"Webpage Title: {title}\nContent sample:\n{article_sample[:1200]}"
83
  except Exception as e:
84
  return f"Webpage error: {e}"
85
 
86
- # ==== SEARCH TOOLS ====
 
 
 
 
 
 
 
 
 
 
 
87
  def duckduckgo_search(query):
88
  try:
89
  with DDGS() as ddgs:
@@ -102,96 +144,66 @@ def wikipedia_search(query):
102
  return None
103
  return None
104
 
105
- def is_coding_question(text):
106
- code_terms = [
107
- "python", "java", "c++", "code", "function", "write a", "script", "algorithm",
108
- "bug", "traceback", "error", "output", "compile", "debug"
109
- ]
110
- if any(term in (text or "").lower() for term in code_terms):
111
- return True
112
- if re.search(r"```.+```", text or "", re.DOTALL):
113
- return True
114
- return False
115
-
116
- def llm_conversational(question):
117
- last_error = None
118
- for model_id in CONVERSATIONAL_MODELS:
119
  try:
120
  hf_client = InferenceClient(model_id, token=HF_TOKEN)
121
  result = hf_client.conversational(
122
- messages=[{"role": "user", "content": question}],
123
- max_new_tokens=512,
124
  )
125
- # Extract generated_text
126
  if isinstance(result, dict) and "generated_text" in result:
127
- return f"[{model_id}] " + result["generated_text"]
128
  elif hasattr(result, "generated_text"):
129
- return f"[{model_id}] " + result.generated_text
130
  elif isinstance(result, str):
131
- return f"[{model_id}] " + result
132
- except Exception as e:
133
- last_error = f"{model_id}: {e}"
134
- return f"LLM Error (all advanced models): {last_error}"
135
 
136
- # ==== SMART AGENT ====
137
- class SmartAgent:
138
- def __init__(self):
139
- pass
 
 
 
 
 
 
 
 
140
 
 
 
141
  def __call__(self, question: str) -> str:
142
- # 1. Handle file/link
 
 
 
143
  links = extract_links(question)
144
- if links:
145
- results = []
146
- for url in links:
147
- if re.search(r"\.xlsx|\.xls|\.csv|\.pdf|\.txt", url):
148
- local = download_file(url)
149
- if local:
150
- file_analysis = analyze_file(local)
151
- results.append(f"File ({url}):\n{file_analysis}")
152
- else:
153
- results.append(f"Could not download file: {url}")
154
- else:
155
- results.append(analyze_webpage(url))
156
- if results:
157
- return "\n\n".join(results)
158
-
159
- # 2. Coding/algorithmic questions: Prefer DeepSeek-Coder-33B
160
- if is_coding_question(question):
161
- coder_client = InferenceClient("deepseek-ai/DeepSeek-Coder-33B-Instruct", token=HF_TOKEN)
162
- try:
163
- coder_result = coder_client.conversational(
164
- messages=[{"role": "user", "content": question}],
165
- max_new_tokens=512,
166
- )
167
- if isinstance(coder_result, dict) and "generated_text" in coder_result:
168
- return "[deepseek-ai/DeepSeek-Coder-33B-Instruct] " + coder_result["generated_text"]
169
- elif hasattr(coder_result, "generated_text"):
170
- return "[deepseek-ai/DeepSeek-Coder-33B-Instruct] " + coder_result.generated_text
171
- elif isinstance(coder_result, str):
172
- return "[deepseek-ai/DeepSeek-Coder-33B-Instruct] " + coder_result
173
- except Exception as e:
174
- # fallback to other chat models
175
- pass
176
-
177
- # 3. DuckDuckGo for current/web knowledge
178
- result = duckduckgo_search(question)
179
- if result:
180
- return result
181
 
182
- # 4. Wikipedia for encyclopedic queries
183
- result = wikipedia_search(question)
184
- if result:
185
- return result
186
-
187
- # 5. Fallback to conversational LLMs
188
- result = llm_conversational(question)
189
- if result:
190
- return result
191
-
192
- return "No answer could be found by available tools."
193
-
194
- # ==== SUBMISSION LOGIC ====
195
  def run_and_submit_all(profile: gr.OAuthProfile | None):
196
  space_id = os.getenv("SPACE_ID")
197
  if profile:
@@ -199,15 +211,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
199
  else:
200
  return "Please Login to Hugging Face with the button.", None
201
 
202
- api_url = DEFAULT_API_URL
203
- questions_url = f"{api_url}/questions"
204
- submit_url = f"{api_url}/submit"
205
 
206
  agent = SmartAgent()
207
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
208
 
209
  try:
210
- response = requests.get(questions_url, timeout=15)
211
  response.raise_for_status()
212
  questions_data = response.json()
213
  except Exception as e:
@@ -231,7 +242,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
231
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
232
 
233
  try:
234
- response = requests.post(submit_url, json=submission_data, timeout=60)
235
  response.raise_for_status()
236
  result_data = response.json()
237
  final_status = (
@@ -246,7 +257,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
246
  except Exception as e:
247
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
248
 
249
- # ==== GRADIO UI ====
250
  with gr.Blocks() as demo:
251
  gr.Markdown("# Smart Agent Evaluation Runner")
252
  gr.Markdown("""
@@ -259,7 +270,6 @@ with gr.Blocks() as demo:
259
  run_button = gr.Button("Run Evaluation & Submit All Answers")
260
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
261
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
262
-
263
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
264
 
265
  if __name__ == "__main__":
 
8
  import wikipediaapi
9
  from bs4 import BeautifulSoup
10
  import pdfplumber
11
+ import pytube
12
 
13
+ # === CONFIG ===
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
+ HF_TOKEN = os.environ.get("HF_TOKEN")
16
+
17
+ ADVANCED_MODELS = [
18
+ "deepseek-ai/DeepSeek-R1",
19
  "deepseek-ai/DeepSeek-V2-Chat",
20
  "Qwen/Qwen2-72B-Instruct",
21
  "mistralai/Mixtral-8x22B-Instruct-v0.1",
22
+ "meta-llama/Meta-Llama-3-70B-Instruct"
 
23
  ]
24
 
25
  wiki_api = wikipediaapi.Wikipedia(language="en", user_agent="SmartAgent/1.0 ([email protected])")
26
 
27
+ # === UTILS ===
28
  def extract_links(text):
29
+ if not text:
30
+ return []
31
  url_pattern = re.compile(r'(https?://[^\s\)\],]+)')
32
+ return url_pattern.findall(text)
33
 
34
  def download_file(url, out_dir="tmp_files"):
35
  os.makedirs(out_dir, exist_ok=True)
36
  filename = url.split("/")[-1].split("?")[0]
37
  local_path = os.path.join(out_dir, filename)
38
  try:
39
+ r = requests.get(url, timeout=30)
40
  r.raise_for_status()
41
  with open(local_path, "wb") as f:
42
  f.write(r.content)
 
44
  except Exception:
45
  return None
46
 
47
+ def summarize_excel(file_path):
48
+ try:
49
+ df = pd.read_excel(file_path)
50
+ # Heuristic: Sum column with "total" or "sales" in name, excluding drinks
51
+ df.columns = [col.lower() for col in df.columns]
52
+ item_col = next((col for col in df.columns if "item" in col or "menu" in col), None)
53
+ total_col = next((col for col in df.columns if "total" in col or "sales" in col or "amount" in col), None)
54
+ if not item_col or not total_col:
55
+ return f"Excel columns: {', '.join(df.columns)}. Could not find item/total columns."
56
+ df_food = df[~df[item_col].str.lower().str.contains("drink|beverage|soda|juice", na=False)]
57
+ total = df_food[total_col].astype(float).sum()
58
+ return f"{total:.2f}"
59
+ except Exception as e:
60
+ return f"Excel error: {e}"
61
+
62
+ def summarize_csv(file_path):
63
+ try:
64
+ df = pd.read_csv(file_path)
65
+ # Same logic as summarize_excel
66
+ df.columns = [col.lower() for col in df.columns]
67
+ item_col = next((col for col in df.columns if "item" in col or "menu" in col), None)
68
+ total_col = next((col for col in df.columns if "total" in col or "sales" in col or "amount" in col), None)
69
+ if not item_col or not total_col:
70
+ return f"CSV columns: {', '.join(df.columns)}. Could not find item/total columns."
71
+ df_food = df[~df[item_col].str.lower().str.contains("drink|beverage|soda|juice", na=False)]
72
+ total = df_food[total_col].astype(float).sum()
73
+ return f"{total:.2f}"
74
+ except Exception as e:
75
+ return f"CSV error: {e}"
76
+
77
+ def summarize_pdf(file_path):
78
+ try:
79
+ with pdfplumber.open(file_path) as pdf:
80
+ first_page = pdf.pages[0].extract_text()
81
+ return f"PDF text sample: {first_page[:1000]}"
82
+ except Exception as e:
83
+ return f"PDF error: {e}"
84
+
85
+ def summarize_txt(file_path):
86
+ try:
87
+ with open(file_path, encoding='utf-8') as f:
88
+ txt = f.read()
89
+ return f"TXT file sample: {txt[:1000]}"
90
+ except Exception as e:
91
+ return f"TXT error: {e}"
92
+
93
  def analyze_file(file_path):
94
+ file_path = file_path.lower()
95
  if file_path.endswith((".xlsx", ".xls")):
96
+ return summarize_excel(file_path)
 
 
 
 
97
  elif file_path.endswith(".csv"):
98
+ return summarize_csv(file_path)
 
 
 
 
99
  elif file_path.endswith(".pdf"):
100
+ return summarize_pdf(file_path)
 
 
 
 
 
101
  elif file_path.endswith(".txt"):
102
+ return summarize_txt(file_path)
 
 
 
 
 
103
  else:
104
  return f"Unsupported file type: {file_path}"
105
 
106
  def analyze_webpage(url):
107
  try:
108
+ r = requests.get(url, timeout=20)
109
  soup = BeautifulSoup(r.text, "lxml")
110
  title = soup.title.string if soup.title else "No title"
111
  paragraphs = [p.get_text() for p in soup.find_all("p")]
112
  article_sample = "\n".join(paragraphs[:5])
113
+ return f"Webpage Title: {title}\nContent sample:\n{article_sample[:1000]}"
114
  except Exception as e:
115
  return f"Webpage error: {e}"
116
 
117
+ def analyze_youtube(url):
118
+ try:
119
+ yt = pytube.YouTube(url)
120
+ captions = yt.captions.get_by_language_code('en')
121
+ if captions:
122
+ text = captions.generate_srt_captions()
123
+ return f"YouTube Transcript sample: {text[:800]}"
124
+ else:
125
+ return f"No English captions found for {url}"
126
+ except Exception as e:
127
+ return f"YouTube error: {e}"
128
+
129
  def duckduckgo_search(query):
130
  try:
131
  with DDGS() as ddgs:
 
144
  return None
145
  return None
146
 
147
+ def llm_conversational(query):
148
+ for model_id in ADVANCED_MODELS:
 
 
 
 
 
 
 
 
 
 
 
 
149
  try:
150
  hf_client = InferenceClient(model_id, token=HF_TOKEN)
151
  result = hf_client.conversational(
152
+ messages=[{"role": "user", "content": query}],
153
+ max_new_tokens=384,
154
  )
 
155
  if isinstance(result, dict) and "generated_text" in result:
156
+ return result["generated_text"]
157
  elif hasattr(result, "generated_text"):
158
+ return result.generated_text
159
  elif isinstance(result, str):
160
+ return result
161
+ except Exception:
162
+ continue
163
+ return "LLM error: No advanced conversational models succeeded."
164
 
165
+ # === TASK-SPECIFIC HANDLERS (expandable) ===
166
+ def handle_grocery_vegetables(question):
167
+ """Extract vegetables from a list in the question."""
168
+ match = re.search(r"list I have so far: (.*)", question)
169
+ if not match:
170
+ return "Could not parse item list."
171
+ items = [i.strip().lower() for i in match.group(1).split(",")]
172
+ vegetables = [
173
+ "broccoli", "celery", "lettuce", "zucchini", "green beans", "sweet potatoes", "bell pepper"
174
+ ]
175
+ result = sorted([item for item in items if item in vegetables])
176
+ return ", ".join(result)
177
 
178
+ # === MAIN AGENT ===
179
+ class SmartAgent:
180
  def __call__(self, question: str) -> str:
181
+ # Task: Grocery vegetables
182
+ if "vegetables" in question.lower() and "categorize" in question.lower():
183
+ return handle_grocery_vegetables(question)
184
+ # Download and analyze any file links
185
  links = extract_links(question)
186
+ for url in links:
187
+ if url.endswith((".xlsx", ".xls", ".csv", ".pdf", ".txt")):
188
+ local = download_file(url)
189
+ if local:
190
+ return analyze_file(local)
191
+ elif "youtube.com" in url or "youtu.be" in url:
192
+ return analyze_youtube(url)
193
+ else:
194
+ return analyze_webpage(url)
195
+ # Wikipedia
196
+ wiki_result = wikipedia_search(question)
197
+ if wiki_result:
198
+ return wiki_result
199
+ # DuckDuckGo
200
+ ddg_result = duckduckgo_search(question)
201
+ if ddg_result:
202
+ return ddg_result
203
+ # Top LLMs
204
+ return llm_conversational(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
+ # === SUBMISSION LOGIC ===
 
 
 
 
 
 
 
 
 
 
 
 
207
  def run_and_submit_all(profile: gr.OAuthProfile | None):
208
  space_id = os.getenv("SPACE_ID")
209
  if profile:
 
211
  else:
212
  return "Please Login to Hugging Face with the button.", None
213
 
214
+ questions_url = f"{DEFAULT_API_URL}/questions"
215
+ submit_url = f"{DEFAULT_API_URL}/submit"
 
216
 
217
  agent = SmartAgent()
218
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
219
 
220
  try:
221
+ response = requests.get(questions_url, timeout=20)
222
  response.raise_for_status()
223
  questions_data = response.json()
224
  except Exception as e:
 
242
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
243
 
244
  try:
245
+ response = requests.post(submit_url, json=submission_data, timeout=90)
246
  response.raise_for_status()
247
  result_data = response.json()
248
  final_status = (
 
257
  except Exception as e:
258
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
259
 
260
+ # === GRADIO UI ===
261
  with gr.Blocks() as demo:
262
  gr.Markdown("# Smart Agent Evaluation Runner")
263
  gr.Markdown("""
 
270
  run_button = gr.Button("Run Evaluation & Submit All Answers")
271
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
272
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
273
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
274
 
275
  if __name__ == "__main__":