schoolkithub commited on
Commit
e79359e
·
verified ·
1 Parent(s): d808ec0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -88
app.py CHANGED
@@ -4,107 +4,76 @@ import requests
4
  import pandas as pd
5
  from huggingface_hub import InferenceClient
6
  from duckduckgo_search import DDGS
7
- from datasets import load_dataset
8
  import wikipediaapi
9
- from llama_index.core import VectorStoreIndex, Document, StorageContext, load_index_from_storage
10
- from llama_index.llms.huggingface import HuggingFaceLLM
11
 
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
- HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
14
- llm_model_id = "deepseek-ai/DeepSeek-V2"
15
- llm_fallback_id = "mistralai/Mistral-7B-Instruct-v0.2"
16
-
17
- # Setup HF LLM client
18
- hf_client = InferenceClient(llm_model_id, token=HF_TOKEN)
19
- hf_fallback = InferenceClient(llm_fallback_id, token=HF_TOKEN)
20
-
21
- # Wikipedia API with user agent
22
- wiki_api = wikipediaapi.Wikipedia(
23
- language='en',
24
- user_agent='SmartAgent/1.0 ([email protected])'
25
- )
26
-
27
- # Build or load LlamaIndex for fast retrieval (optional, for small Wikipedia sample)
28
- try:
29
- wiki_dataset = load_dataset("wikipedia", "20220301.en", split="train[:5000]", trust_remote_code=True)
30
- docs = [Document(text=doc['text']) for doc in wiki_dataset]
31
- index = VectorStoreIndex.from_documents(docs)
32
- except Exception as e:
33
- index = None
34
 
 
 
 
 
 
 
 
 
 
35
  def duckduckgo_search(query):
36
  with DDGS() as ddgs:
37
  results = [r for r in ddgs.text(query, max_results=3)]
38
- return "\n".join([r["body"] for r in results if r.get("body")]) or "No results found."
39
 
40
  def wikipedia_search(query):
41
  page = wiki_api.page(query)
42
- return page.summary if page.exists() else None
43
-
44
- def index_search(query):
45
- if index is None:
46
- return None
47
- res = index.as_query_engine().query(query)
48
- return str(res) if res else None
49
-
50
- def handle_excel(file_url):
51
- # Download and sum food (not drinks)
52
- try:
53
- fname = "tmp.xlsx"
54
- r = requests.get(file_url)
55
- with open(fname, "wb") as f:
56
- f.write(r.content)
57
- df = pd.read_excel(fname)
58
- # Assume drinks have 'drink' or 'beverage' in a column called 'Item' or 'Category'
59
- if "Item" in df.columns:
60
- food_df = df[~df["Item"].str.contains("drink|beverage", case=False, na=False)]
61
- total = food_df["Total"].sum()
62
- return f"${total:.2f}"
63
- if "Category" in df.columns:
64
- food_df = df[df["Category"].str.lower() == "food"]
65
- total = food_df["Total"].sum()
66
- return f"${total:.2f}"
67
- return "File parsed but could not find food sales."
68
- except Exception as e:
69
- return f"Excel error: {e}"
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  class SmartAgent:
72
  def __init__(self):
73
  pass
74
 
75
  def __call__(self, question: str) -> str:
76
  q_lower = question.lower()
77
-
78
- # DuckDuckGo for current events/recent/live
79
- if any(term in q_lower for term in ["current", "latest", "2024", "2025", "who is the president", "recent", "live"]):
80
- return duckduckgo_search(question)
81
-
82
- # Wikipedia summary
83
  wiki_result = wikipedia_search(question)
84
- if wiki_result:
85
  return wiki_result
 
 
86
 
87
- # LlamaIndex retrieval
88
- rag_result = index_search(question)
89
- if rag_result:
90
- return rag_result
91
-
92
- # LLM generation
93
- try:
94
- resp = hf_client.text_generation(question, max_new_tokens=256)
95
- return resp
96
- except Exception:
97
- try:
98
- resp = hf_fallback.text_generation(question, max_new_tokens=256)
99
- return resp
100
- except Exception as e:
101
- return f"HF LLM error: {e}"
102
-
103
  def run_and_submit_all(profile: gr.OAuthProfile | None):
104
  space_id = os.getenv("SPACE_ID")
105
  if profile:
106
  username = profile.username
107
- print(f"User logged in: {username}")
108
  else:
109
  return "Please Login to Hugging Face with the button.", None
110
 
@@ -128,17 +97,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
128
  for item in questions_data:
129
  task_id = item.get("task_id")
130
  question_text = item.get("question")
131
- file_url = item.get("file_url", None)
132
  if not task_id or not question_text:
133
  continue
134
-
135
- # Handle Excel task
136
- if file_url and ("excel" in question_text.lower() or "file" in question_text.lower()):
137
- submitted_answer = handle_excel(file_url)
138
- else:
139
- submitted_answer = agent(question_text)
140
-
141
- # Final answer extraction/formatting if needed (TODO: Add regex/extract logic)
142
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
143
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
144
 
@@ -146,7 +107,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
146
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
147
 
148
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
149
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
150
 
151
  try:
152
  response = requests.post(submit_url, json=submission_data, timeout=60)
@@ -164,7 +124,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
164
  except Exception as e:
165
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
166
 
167
- # Gradio interface
168
  with gr.Blocks() as demo:
169
  gr.Markdown("# Smart Agent Evaluation Runner")
170
  gr.Markdown("""
@@ -177,6 +137,7 @@ with gr.Blocks() as demo:
177
  run_button = gr.Button("Run Evaluation & Submit All Answers")
178
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
179
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
180
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
181
 
182
  if __name__ == "__main__":
 
4
  import pandas as pd
5
  from huggingface_hub import InferenceClient
6
  from duckduckgo_search import DDGS
 
7
  import wikipediaapi
 
 
8
 
9
+ # ==== CONFIG ====
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
+ HF_TOKEN = os.getenv("HF_TOKEN")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ CONVERSATIONAL_MODELS = [
14
+ "deepseek-ai/DeepSeek-LLM",
15
+ "HuggingFaceH4/zephyr-7b-beta",
16
+ "mistralai/Mistral-7B-Instruct-v0.2"
17
+ ]
18
+
19
+ wiki_api = wikipediaapi.Wikipedia(language="en", user_agent="SmartAgent/1.0 ([email protected])")
20
+
21
+ # ==== SEARCH TOOLS ====
22
  def duckduckgo_search(query):
23
  with DDGS() as ddgs:
24
  results = [r for r in ddgs.text(query, max_results=3)]
25
+ return "\n".join([r.get("body", "") for r in results if r.get("body")]) or "No DuckDuckGo results found."
26
 
27
  def wikipedia_search(query):
28
  page = wiki_api.page(query)
29
+ return page.summary if page.exists() and page.summary else "No Wikipedia page found."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ def hf_chat_model(question):
32
+ last_error = ""
33
+ for model_id in CONVERSATIONAL_MODELS:
34
+ try:
35
+ hf_client = InferenceClient(model_id, token=HF_TOKEN)
36
+ result = hf_client.conversational(
37
+ messages=[{"role": "user", "content": question}],
38
+ max_new_tokens=384,
39
+ )
40
+ # Acceptable result types: dict, obj with 'generated_text', or str
41
+ if isinstance(result, dict) and "generated_text" in result:
42
+ return f"[{model_id}] " + result["generated_text"]
43
+ elif hasattr(result, "generated_text"):
44
+ return f"[{model_id}] " + result.generated_text
45
+ elif isinstance(result, str):
46
+ return f"[{model_id}] " + result
47
+ else:
48
+ return f"[{model_id}] " + str(result)
49
+ except Exception as e:
50
+ last_error = f"({model_id}) {e}"
51
+ return f"HF LLM error: {last_error}"
52
+
53
+ # ==== SMART AGENT ====
54
  class SmartAgent:
55
  def __init__(self):
56
  pass
57
 
58
  def __call__(self, question: str) -> str:
59
  q_lower = question.lower()
60
+ # DuckDuckGo for current/event/internet questions
61
+ if any(term in q_lower for term in ["current", "latest", "2024", "2025", "who is the president", "recent", "live", "now", "today"]):
62
+ duck_result = duckduckgo_search(question)
63
+ if duck_result and "No DuckDuckGo" not in duck_result:
64
+ return duck_result
65
+ # Wikipedia for encyclopedic knowledge
66
  wiki_result = wikipedia_search(question)
67
+ if wiki_result and "No Wikipedia page found" not in wiki_result:
68
  return wiki_result
69
+ # Fallback to LLMs
70
+ return hf_chat_model(question)
71
 
72
+ # ==== SUBMISSION LOGIC ====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def run_and_submit_all(profile: gr.OAuthProfile | None):
74
  space_id = os.getenv("SPACE_ID")
75
  if profile:
76
  username = profile.username
 
77
  else:
78
  return "Please Login to Hugging Face with the button.", None
79
 
 
97
  for item in questions_data:
98
  task_id = item.get("task_id")
99
  question_text = item.get("question")
 
100
  if not task_id or not question_text:
101
  continue
102
+ submitted_answer = agent(question_text)
 
 
 
 
 
 
 
103
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
104
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
105
 
 
107
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
108
 
109
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
110
 
111
  try:
112
  response = requests.post(submit_url, json=submission_data, timeout=60)
 
124
  except Exception as e:
125
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
126
 
127
+ # ==== GRADIO UI ====
128
  with gr.Blocks() as demo:
129
  gr.Markdown("# Smart Agent Evaluation Runner")
130
  gr.Markdown("""
 
137
  run_button = gr.Button("Run Evaluation & Submit All Answers")
138
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
139
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
140
+
141
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
142
 
143
  if __name__ == "__main__":