davanstrien HF Staff commited on
Commit
4bdf3bc
·
1 Parent(s): 54dff04

Remove app.py file

Browse files
Files changed (1) hide show
  1. app.py +0 -133
app.py DELETED
@@ -1,133 +0,0 @@
1
- import asyncio
2
- from typing import Dict, List
3
-
4
- import gradio as gr
5
- import httpx
6
-
7
- API_URL = "http://localhost:8000"
8
-
9
-
10
- async def fetch_similar_datasets(dataset_id: str, limit: int = 5) -> List[Dict]:
11
- async with httpx.AsyncClient() as client:
12
- response = await client.get(
13
- f"{API_URL}/similarity/datasets",
14
- params={"dataset_id": dataset_id, "k": limit},
15
- )
16
- if response.status_code == 200:
17
- return response.json()["results"]
18
- return []
19
-
20
-
21
- async def fetch_similar_datasets_by_text(query: str, limit: int = 5) -> List[Dict]:
22
- async with httpx.AsyncClient() as client:
23
- response = await client.get(
24
- f"{API_URL}/search/datasets", params={"query": query, "k": limit}
25
- )
26
- if response.status_code == 200:
27
- return response.json()["results"]
28
- return []
29
-
30
-
31
- def format_results(results: List[Dict]) -> str:
32
- markdown = ""
33
-
34
- for result in results:
35
- hub_id = result["dataset_id"]
36
- similarity = result["similarity"]
37
- summary = result.get("summary", "No summary available.")
38
- url = f"https://huggingface.co/datasets/{hub_id}"
39
-
40
- markdown += f"### [{hub_id}]({url})\n"
41
- markdown += f"*Similarity: {similarity:.2f}*\n\n"
42
- markdown += f"{summary}\n\n"
43
- markdown += "---\n\n"
44
-
45
- return markdown
46
-
47
-
48
- with gr.Blocks() as demo:
49
- gr.Markdown(
50
- """
51
- # 🔍 Dataset Explorer
52
- Find similar datasets or search by text query
53
- """,
54
- elem_classes=["center-text"],
55
- )
56
-
57
- with gr.Column(variant="panel"):
58
- search_type = gr.Radio(
59
- ["Dataset ID", "Text Query"],
60
- label="Search Method",
61
- value="Dataset ID",
62
- container=False,
63
- )
64
-
65
- with gr.Group():
66
- dataset_id = gr.Textbox(
67
- value="airtrain-ai/fineweb-edu-fortified",
68
- label="Dataset ID",
69
- container=False,
70
- )
71
- text_query = gr.Textbox(
72
- label="Text Query",
73
- placeholder="Enter at least 3 characters...",
74
- container=False,
75
- visible=False,
76
- )
77
-
78
- with gr.Row():
79
- search_btn = gr.Button("🔍 Search", size="lg")
80
- max_results = gr.Slider(
81
- minimum=1,
82
- maximum=20,
83
- step=1,
84
- value=5,
85
- label="Number of results",
86
- )
87
-
88
- results = gr.Markdown(elem_classes=["results-container"])
89
-
90
- def toggle_input_visibility(choice):
91
- return (
92
- gr.update(visible=choice == "Dataset ID"),
93
- gr.update(visible=choice == "Text Query"),
94
- gr.update(visible=choice == "Dataset ID"),
95
- )
96
-
97
- search_type.change(
98
- toggle_input_visibility,
99
- inputs=[search_type],
100
- outputs=[dataset_id, text_query, search_btn],
101
- )
102
-
103
- async def search_handler(search_type, dataset_id, text_query, limit):
104
- if search_type == "Dataset ID":
105
- results = await fetch_similar_datasets(dataset_id, limit)
106
- else:
107
- results = await fetch_similar_datasets_by_text(text_query, limit)
108
-
109
- if not results:
110
- return "No similar datasets found."
111
-
112
- return format_results(results)
113
-
114
- text_query.input(
115
- lambda search_type, text_query, limit: asyncio.run(
116
- search_handler(search_type, "", text_query, limit)
117
- )
118
- if len(text_query) >= 3
119
- else None, # Only trigger after 3 characters
120
- inputs=[search_type, text_query, max_results],
121
- outputs=results,
122
- api_name=False,
123
- )
124
-
125
- search_btn.click(
126
- lambda search_type, dataset_id, text_query, limit: asyncio.run(
127
- search_handler(search_type, dataset_id, text_query, limit)
128
- ),
129
- inputs=[search_type, dataset_id, text_query, max_results],
130
- outputs=results,
131
- )
132
-
133
- demo.launch()