schoolkithub commited on
Commit
ef7e6c0
·
verified ·
1 Parent(s): e3edf78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -92
app.py CHANGED
@@ -12,17 +12,19 @@ import pdfplumber
12
  # ==== CONFIG ====
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
  HF_TOKEN = os.getenv("HF_TOKEN")
15
- GROK_API_KEY = os.getenv("GROK_API_KEY")
16
 
 
17
  CONVERSATIONAL_MODELS = [
18
- "deepseek-ai/DeepSeek-LLM",
19
- "HuggingFaceH4/zephyr-7b-beta",
20
- "mistralai/Mistral-7B-Instruct-v0.2"
 
21
  ]
 
22
 
23
  wiki_api = wikipediaapi.Wikipedia(language="en", user_agent="SmartAgent/1.0 ([email protected])")
24
 
25
- # ==== UTILITY: Link/file detection ====
26
  def extract_links(text):
27
  url_pattern = re.compile(r'(https?://[^\s\)\],]+)')
28
  return url_pattern.findall(text or "")
@@ -40,36 +42,26 @@ def download_file(url, out_dir="tmp_files"):
40
  except Exception:
41
  return None
42
 
43
- # ==== File/Link Analyzers ====
44
  def analyze_file(file_path):
45
- if file_path.endswith((".xlsx", ".xls")):
46
- try:
47
  df = pd.read_excel(file_path)
48
  return f"Excel summary: {df.head().to_markdown(index=False)}"
49
- except Exception as e:
50
- return f"Excel error: {e}"
51
- elif file_path.endswith(".csv"):
52
- try:
53
  df = pd.read_csv(file_path)
54
  return f"CSV summary: {df.head().to_markdown(index=False)}"
55
- except Exception as e:
56
- return f"CSV error: {e}"
57
- elif file_path.endswith(".pdf"):
58
- try:
59
  with pdfplumber.open(file_path) as pdf:
60
  first_page = pdf.pages[0].extract_text()
61
  return f"PDF text sample: {first_page[:1000]}"
62
- except Exception as e:
63
- return f"PDF error: {e}"
64
- elif file_path.endswith(".txt"):
65
- try:
66
  with open(file_path, encoding='utf-8') as f:
67
  txt = f.read()
68
  return f"TXT file sample: {txt[:1000]}"
69
- except Exception as e:
70
- return f"TXT error: {e}"
71
- else:
72
- return f"Unsupported file type: {file_path}"
73
 
74
  def analyze_webpage(url):
75
  try:
@@ -82,7 +74,6 @@ def analyze_webpage(url):
82
  except Exception as e:
83
  return f"Webpage error: {e}"
84
 
85
- # ==== SEARCH TOOLS ====
86
  def duckduckgo_search(query):
87
  try:
88
  with DDGS() as ddgs:
@@ -101,39 +92,6 @@ def wikipedia_search(query):
101
  return None
102
  return None
103
 
104
- def llm_conversational(query):
105
- last_error = None
106
- for model_id in CONVERSATIONAL_MODELS:
107
- try:
108
- hf_client = InferenceClient(model_id, token=HF_TOKEN)
109
- # Try conversational if available, else fallback to text_generation
110
- if hasattr(hf_client, "conversational"):
111
- try:
112
- result = hf_client.conversational(
113
- messages=[{"role": "user", "content": query}],
114
- max_new_tokens=384,
115
- )
116
- if isinstance(result, dict) and "generated_text" in result:
117
- return result["generated_text"]
118
- elif hasattr(result, "generated_text"):
119
- return result.generated_text
120
- elif isinstance(result, str):
121
- return result
122
- except Exception:
123
- pass
124
- # Fallback to text_generation
125
- try:
126
- result = hf_client.text_generation(query, max_new_tokens=384)
127
- if isinstance(result, dict) and "generated_text" in result:
128
- return result["generated_text"]
129
- elif isinstance(result, str):
130
- return result
131
- except Exception:
132
- pass
133
- except Exception as e:
134
- last_error = f"{model_id}: {e}"
135
- return None
136
-
137
  def is_coding_question(text):
138
  code_terms = [
139
  "python", "java", "c++", "code", "function", "write a", "script", "algorithm",
@@ -145,28 +103,31 @@ def is_coding_question(text):
145
  return True
146
  return False
147
 
148
- def grok_completion(question, system_prompt=None):
149
- url = "https://api.x.ai/v1/chat/completions"
150
- headers = {
151
- "Content-Type": "application/json",
152
- "Authorization": f"Bearer {GROK_API_KEY}"
153
- }
154
- payload = {
155
- "messages": [
156
- {"role": "system", "content": system_prompt or "You are a helpful coding and research assistant."},
157
- {"role": "user", "content": question}
158
- ],
159
- "model": "grok-3-latest",
160
- "stream": False,
161
- "temperature": 0
162
- }
163
  try:
164
- r = requests.post(url, headers=headers, json=payload, timeout=45)
165
- r.raise_for_status()
166
- data = r.json()
167
- return data['choices'][0]['message']['content']
168
- except Exception:
169
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  # ==== SMART AGENT ====
172
  class SmartAgent:
@@ -191,31 +152,28 @@ class SmartAgent:
191
  if results:
192
  return "\n\n".join(results)
193
 
194
- # 2. Coding or algorithmic problems? Try Grok FIRST
195
  if is_coding_question(question):
196
- grok_response = grok_completion(question)
197
- if grok_response:
198
- return f"[Grok] {grok_response}"
199
 
200
- # 3. DuckDuckGo for web knowledge
201
  result = duckduckgo_search(question)
202
  if result:
203
  return result
204
- # 4. Wikipedia for encyclopedic queries
 
205
  result = wikipedia_search(question)
206
  if result:
207
  return result
208
- # 5. Grok again for hard/reasoning/general (if not already tried)
209
- if not is_coding_question(question):
210
- grok_response = grok_completion(question)
211
- if grok_response:
212
- return f"[Grok] {grok_response}"
213
 
214
- # 6. Fallback to LLM conversational
215
  result = llm_conversational(question)
216
  if result:
217
  return result
218
- return "No answer could be found by available tools."
 
219
 
220
  # ==== SUBMISSION LOGIC ====
221
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
12
  # ==== CONFIG ====
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
  HF_TOKEN = os.getenv("HF_TOKEN")
 
15
 
16
+ # SOTA models: for general and code queries
17
  CONVERSATIONAL_MODELS = [
18
+ "deepseek-ai/DeepSeek-V2-Chat",
19
+ "Qwen/Qwen2-72B-Instruct",
20
+ "mistralai/Mixtral-8x22B-Instruct-v0.1",
21
+ "meta-llama/Meta-Llama-3-70B-Instruct"
22
  ]
23
+ CODING_MODEL = "deepseek-ai/DeepSeek-Coder-33B-Instruct"
24
 
25
  wiki_api = wikipediaapi.Wikipedia(language="en", user_agent="SmartAgent/1.0 ([email protected])")
26
 
27
+ # ==== UTILITIES ====
28
  def extract_links(text):
29
  url_pattern = re.compile(r'(https?://[^\s\)\],]+)')
30
  return url_pattern.findall(text or "")
 
42
  except Exception:
43
  return None
44
 
 
45
  def analyze_file(file_path):
46
+ try:
47
+ if file_path.endswith((".xlsx", ".xls")):
48
  df = pd.read_excel(file_path)
49
  return f"Excel summary: {df.head().to_markdown(index=False)}"
50
+ elif file_path.endswith(".csv"):
 
 
 
51
  df = pd.read_csv(file_path)
52
  return f"CSV summary: {df.head().to_markdown(index=False)}"
53
+ elif file_path.endswith(".pdf"):
 
 
 
54
  with pdfplumber.open(file_path) as pdf:
55
  first_page = pdf.pages[0].extract_text()
56
  return f"PDF text sample: {first_page[:1000]}"
57
+ elif file_path.endswith(".txt"):
 
 
 
58
  with open(file_path, encoding='utf-8') as f:
59
  txt = f.read()
60
  return f"TXT file sample: {txt[:1000]}"
61
+ else:
62
+ return f"Unsupported file type: {file_path}"
63
+ except Exception as e:
64
+ return f"File analysis error: {e}"
65
 
66
  def analyze_webpage(url):
67
  try:
 
74
  except Exception as e:
75
  return f"Webpage error: {e}"
76
 
 
77
  def duckduckgo_search(query):
78
  try:
79
  with DDGS() as ddgs:
 
92
  return None
93
  return None
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  def is_coding_question(text):
96
  code_terms = [
97
  "python", "java", "c++", "code", "function", "write a", "script", "algorithm",
 
103
  return True
104
  return False
105
 
106
+ def llm_coder(query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  try:
108
+ hf_client = InferenceClient(CODING_MODEL, token=HF_TOKEN)
109
+ result = hf_client.text_generation(query, max_new_tokens=1024)
110
+ if isinstance(result, dict) and "generated_text" in result:
111
+ return f"[{CODING_MODEL}] {result['generated_text']}"
112
+ elif isinstance(result, str):
113
+ return f"[{CODING_MODEL}] {result}"
114
+ return "Unknown result format from coder model."
115
+ except Exception as e:
116
+ return f"Coder Model Error: {e}"
117
+
118
+ def llm_conversational(query):
119
+ last_error = None
120
+ for model_id in CONVERSATIONAL_MODELS:
121
+ try:
122
+ hf_client = InferenceClient(model_id, token=HF_TOKEN)
123
+ result = hf_client.text_generation(query, max_new_tokens=512)
124
+ if isinstance(result, dict) and "generated_text" in result:
125
+ return f"[{model_id}] {result['generated_text']}"
126
+ elif isinstance(result, str):
127
+ return f"[{model_id}] {result}"
128
+ except Exception as e:
129
+ last_error = f"{model_id}: {e}"
130
+ return f"LLM Error (all advanced models): {last_error or 'Unknown error'}"
131
 
132
  # ==== SMART AGENT ====
133
  class SmartAgent:
 
152
  if results:
153
  return "\n\n".join(results)
154
 
155
+ # 2. Code/coding questions: use coder model
156
  if is_coding_question(question):
157
+ result = llm_coder(question)
158
+ if result:
159
+ return result
160
 
161
+ # 3. DuckDuckGo for fresh web results
162
  result = duckduckgo_search(question)
163
  if result:
164
  return result
165
+
166
+ # 4. Wikipedia for encyclopedic facts
167
  result = wikipedia_search(question)
168
  if result:
169
  return result
 
 
 
 
 
170
 
171
+ # 5. General QA, reasoning, or fallback: conversational SOTA models
172
  result = llm_conversational(question)
173
  if result:
174
  return result
175
+
176
+ return "No answer could be found by available models."
177
 
178
  # ==== SUBMISSION LOGIC ====
179
  def run_and_submit_all(profile: gr.OAuthProfile | None):