rdave88 commited on
Commit
7318893
·
verified ·
1 Parent(s): 81917a3

Upload model_tools.py

Browse files

functions to scrape model and task data from huggingface url

Files changed (1) hide show
  1. model_tools.py +66 -0
model_tools.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model_tools.py
2
+
3
+ import ollama
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+
7
+ # ---- LLM Task Extractor ----
8
+
9
+ def extract_task(user_input: str) -> str:
10
+ """
11
+ Use local Ollama LLM to classify user query into Hugging Face task.
12
+ """
13
+ prompt = f"""
14
+ You are an AI agent helping a developer select the right ML model.
15
+ Given this request: "{user_input}"
16
+
17
+ Reply with only the corresponding Hugging Face task like:
18
+ - text-classification
19
+ - summarization
20
+ - translation
21
+ - image-classification
22
+ - etc.
23
+ Only reply with the task name, and nothing else.
24
+ """
25
+
26
+ response = ollama.chat(
27
+ model="mistral", # Replace with llama3, phi3, etc. if needed
28
+ messages=[{"role": "user", "content": prompt}]
29
+ )
30
+
31
+ return response['message']['content'].strip().lower()
32
+
33
+ # ---- Hugging Face Scraper ----
34
+
35
+ def scrape_huggingface_models(task: str, max_results=5) -> list[dict]:
36
+ """
37
+ Scrapes Hugging Face for top models for a given task.
38
+ """
39
+ url = f"https://huggingface.co/models?pipeline_tag={task}&sort=downloads"
40
+
41
+ try:
42
+ resp = requests.get(url)
43
+ soup = BeautifulSoup(resp.text, "html.parser")
44
+ model_cards = soup.find_all("article", class_="model-card")[:max_results]
45
+
46
+ results = []
47
+ for card in model_cards:
48
+ name_tag = card.find("a", class_="model-link")
49
+ model_name = name_tag.text.strip() if name_tag else "unknown"
50
+
51
+ task_div = card.find("div", class_="task-tag")
52
+ task_name = task_div.text.strip() if task_div else task
53
+
54
+ arch = "encoder-decoder" if "bart" in model_name.lower() or "t5" in model_name.lower() else "unknown"
55
+
56
+ results.append({
57
+ "model_name": model_name,
58
+ "task": task_name,
59
+ "architecture": arch
60
+ })
61
+
62
+ return results
63
+
64
+ except Exception as e:
65
+ print(f"Scraping error: {e}")
66
+ return []