davanstrien's picture
davanstrien HF Staff
Refactor dataset search and sorting functionality, add share query button, update findSimilarFromResult function, improve dataset suggestions handling, make the page mobile friendly, and refactor dataset input to include trending dataset suggestions.
899ba6d
raw
history blame
28.6 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Hub Semantic Search</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://unpkg.com/lucide@latest"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lodash.min.js"></script>
</head>
<body>
<div class="w-full max-w-4xl mx-auto p-4 space-y-8">
<h1 class="text-3xl font-bold text-gray-800">Hub Semantic Search</h1>
<div
class="bg-gradient-to-br from-blue-50 to-indigo-50 p-6 rounded-xl shadow-sm border border-blue-100 mb-6"
>
<h2
class="text-lg font-semibold mb-2 text-gray-800 flex items-center gap-2"
>
<i data-lucide="search" class="text-blue-500"></i>
Welcome to Hub Semantic Search
</h2>
<p class="text-gray-700 mb-2 text-sm">
Find and explore the 🤗 Hub using via semantic search on LLM generated
summaries!
</p>
<div
class="bg-blue-100 text-blue-800 px-3 py-1.5 rounded-md mb-2 text-sm"
>
<p class="flex items-center gap-2">
<i data-lucide="info"></i> Currently supporting dataset search only.
Model search coming soon!
</p>
</div>
<button
onclick="toggleAccordion()"
id="accordionButton"
class="text-blue-500 hover:text-blue-700 flex items-center gap-2 text-sm"
>
<i
data-lucide="chevron-right"
id="accordionIcon"
class="transition-transform"
></i>
<span>How it works</span>
</button>
<div id="accordionContent" class="hidden">
<ul
class="list-disc list-inside space-y-1 text-gray-600 ml-4 mt-2 text-sm"
>
<li>
<strong>AI-Generated Summaries:</strong> Each dataset is indexed
using a concise summary generated by an LLM
</li>
<li>
<strong>Semantic Search:</strong> Find semantically similar
resources based on these summaries
</li>
<li>
<strong>Find Similar:</strong> Discover related resources using
semantic matching
</li>
</ul>
</div>
</div>
<div class="tabs w-full">
<div class="tab-list flex gap-2 border-b mb-6">
<button
onclick="switchTab('search')"
id="searchTab"
class="tab-trigger active px-4 sm:px-6 py-3 flex items-center gap-2 border-b-2 border-transparent hover:bg-gray-50 transition-colors flex-1 justify-center"
>
<i data-lucide="search"></i> Search
</button>
<button
onclick="switchTab('similar')"
id="similarTab"
class="tab-trigger px-4 sm:px-6 py-3 flex items-center gap-2 border-b-2 border-transparent hover:bg-gray-50 transition-colors flex-1 justify-center"
>
<i data-lucide="arrow-right"></i> Find Similar
</button>
</div>
<div id="searchContent" class="tab-content space-y-4">
<div
class="card bg-white p-8 rounded-xl shadow-sm border border-gray-100"
>
<div
class="flex flex-col sm:flex-row gap-4 items-start sm:items-center justify-between mb-4"
>
<p class="text-gray-600">
Enter keywords to search through dataset descriptions. The
search will automatically update as you type.
</p>
<select
id="searchSortSelect"
class="text-sm border rounded-lg px-3 py-2 bg-white text-gray-700 focus:ring-2 focus:ring-blue-100 focus:border-blue-300 transition-all outline-none"
onchange="handleSortChange('search')"
>
<option value="similarity">Sort by relevance</option>
<option value="likes">Sort by likes</option>
<option value="downloads">Sort by downloads</option>
</select>
</div>
<div class="relative">
<input
type="text"
id="searchInput"
placeholder="Type to search (minimum 3 characters)..."
class="w-full p-3 border rounded-lg pr-10 focus:ring-2 focus:ring-blue-100 focus:border-blue-300 transition-all outline-none"
/>
<div id="searchLoader" class="hidden absolute right-3 top-2">
<i data-lucide="loader-2" class="animate-spin"></i>
</div>
</div>
</div>
</div>
<div id="similarContent" class="hidden tab-content space-y-4">
<div
class="card bg-white p-8 rounded-xl shadow-sm border border-gray-100"
>
<div
class="flex flex-col sm:flex-row gap-4 items-start sm:items-center justify-between mb-4"
>
<p class="text-gray-600">
Enter a dataset ID to find similar datasets. Popular datasets
will appear as you type.
</p>
<select
id="similarSortSelect"
class="text-sm border rounded-lg px-3 py-2 bg-white text-gray-700 focus:ring-2 focus:ring-blue-100 focus:border-blue-300 transition-all outline-none"
onchange="handleSortChange('similar')"
>
<option value="similarity">Sort by relevance</option>
<option value="likes">Sort by likes</option>
<option value="downloads">Sort by downloads</option>
</select>
</div>
<div class="flex gap-3">
<div class="relative w-full">
<input
type="text"
id="datasetInput"
class="w-full p-3 border border-gray-200 rounded-lg"
placeholder="e.g. openai/gsm8k"
/>
<div
id="suggestionsBox"
class="hidden absolute w-full mt-1 bg-white border border-gray-200 rounded-lg shadow-lg z-10 max-h-60 overflow-y-auto"
></div>
</div>
<button onclick="findSimilarDatasets()" class="btn-primary">
Find Similar
</button>
</div>
</div>
</div>
<div
id="errorMessage"
class="hidden mt-4 p-4 text-red-600 bg-red-50 rounded-md"
></div>
<div id="resultsContainer" class="mt-6 space-y-4"></div>
</div>
</div>
<style>
.tab-trigger.active {
border-bottom-color: #3b82f6;
color: #3b82f6;
}
</style>
<script>
// Configuration
const API_URL =
"https://davanstrien-huggingface-datasets-search-v2.hf.space";
const MIN_SEARCH_LENGTH = 3;
const DEBOUNCE_MS = 300;
const RESULTS_PER_PAGE = 5;
const MAX_RESULTS = 100;
let currentPage = 1;
// Add these constants near the top with other configurations
const URL_PARAMS = new URLSearchParams(window.location.search);
const INITIAL_SEARCH = URL_PARAMS.get("q");
const INITIAL_SIMILAR = URL_PARAMS.get("similar");
// Add this variable with other configurations
let currentSort = "similarity";
// Initialize Lucide icons
lucide.createIcons();
// Tab switching
function switchTab(tabId) {
currentPage = 1;
document
.querySelectorAll(".tab-content")
.forEach((content) => content.classList.add("hidden"));
document
.querySelectorAll(".tab-trigger")
.forEach((trigger) => trigger.classList.remove("active"));
document.getElementById(`${tabId}Content`).classList.remove("hidden");
document.getElementById(`${tabId}Tab`).classList.add("active");
// Clear URL parameters when switching tabs
if (tabId === "search") {
updateURL({ similar: null });
} else if (tabId === "similar") {
updateURL({ q: null });
}
}
// Create result card
function createResultCard(result) {
const cardHtml = `
<div class="card bg-white p-4 sm:p-6 rounded-lg shadow hover:shadow-md transition-shadow">
<div class="space-y-2 w-full">
<div class="flex flex-col sm:flex-row sm:items-center justify-between gap-2">
<div class="flex items-center gap-2">
<i data-lucide="database" class="text-blue-500"></i>
<h3 class="text-lg font-semibold">${
result.dataset_id
}</h3>
</div>
<div class="flex flex-wrap items-center gap-2">
<div class="flex items-center gap-4 text-sm text-gray-500">
<span class="flex items-center gap-1">
<i data-lucide="heart" class="w-4 h-4"></i>
${result.likes}
</span>
<span class="flex items-center gap-1">
<i data-lucide="download" class="w-4 h-4"></i>
${result.downloads}
</span>
</div>
<span class="bg-blue-50 px-2 py-1 rounded text-sm">
${(result.similarity * 100).toFixed(1)}% match
</span>
<button
onclick="findSimilarFromResult('${
result.dataset_id
}')"
class="flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700"
>
<i data-lucide="arrow-right"></i>
Find Similar
</button>
</div>
</div>
<p class="text-sm text-gray-600">${result.summary}</p>
<!-- Add preview section that starts hidden -->
<div id="preview-section-${
result.dataset_id
}" class="mt-4 border-t pt-4 hidden">
<button
onclick="togglePreview('${result.dataset_id}')"
class="flex items-center gap-2 text-sm text-gray-600 hover:text-gray-800"
>
<i data-lucide="chevron-right" id="preview-icon-${
result.dataset_id
}" class="transition-transform"></i>
Preview Dataset
</button>
<div id="preview-content-${
result.dataset_id
}" class="hidden mt-4">
<iframe
src="https://huggingface.co/datasets/${
result.dataset_id
}/embed/viewer/default/train"
frameborder="0"
width="100%"
height="560px"
></iframe>
</div>
</div>
<a href="https://huggingface.co/datasets/${
result.dataset_id
}"
target="_blank"
class="inline-flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700 mt-2">
<i data-lucide="external-link" class="w-4 h-4"></i>
View on Hugging Face Hub
</a>
</div>
</div>
`;
// After rendering the card, check if preview is available
checkDatasetValidity(result.dataset_id);
return cardHtml;
}
// Add function to check dataset validity
async function checkDatasetValidity(datasetId) {
try {
const response = await fetch(
`https://datasets-server.huggingface.co/is-valid?dataset=${datasetId}`
);
const data = await response.json();
// Show preview section only if viewer is available
if (data.viewer) {
const previewSection = document.getElementById(
`preview-section-${datasetId}`
);
if (previewSection) {
previewSection.classList.remove("hidden");
}
}
} catch (error) {
console.error(
`Failed to check validity for dataset ${datasetId}:`,
error
);
}
}
// Add this function to update the URL
function updateURL(params) {
const newURL = new URL(window.location);
Object.entries(params).forEach(([key, value]) => {
if (value) {
newURL.searchParams.set(key, value);
} else {
newURL.searchParams.delete(key);
}
});
window.history.pushState({}, "", newURL);
}
// Modify the searchDatasets function
const searchDatasets = _.debounce(async (query, page = 1) => {
if (query.length < MIN_SEARCH_LENGTH) {
document.getElementById("resultsContainer").innerHTML = "";
updateURL({ q: null, similar: null }); // Clear URL params
return;
}
document.getElementById("searchLoader").classList.remove("hidden");
document.getElementById("errorMessage").classList.add("hidden");
// Update URL with search query
updateURL({ q: query, similar: null });
try {
const response = await fetch(
`${API_URL}/search/datasets?query=${encodeURIComponent(query)}&k=${
RESULTS_PER_PAGE * page
}`
);
if (!response.ok) throw new Error("Search failed");
const data = await response.json();
console.log("Search results:", data);
displayResults(data.results, page);
} catch (error) {
console.error("Search error:", error);
showError("Failed to perform search. Please try again.");
} finally {
document.getElementById("searchLoader").classList.add("hidden");
}
}, DEBOUNCE_MS);
// Cache for trending datasets
let trendingDatasetsCache = null;
let cacheTimestamp = null;
const CACHE_DURATION = 1000 * 60 * 15; // 15 minutes
async function fetchTrendingDatasets() {
if (
trendingDatasetsCache &&
cacheTimestamp &&
Date.now() - cacheTimestamp < CACHE_DURATION
) {
return trendingDatasetsCache;
}
try {
const response = await fetch("https://huggingface.co/api/datasets");
const data = await response.json();
// Just take the first 20 dataset IDs since they're already sorted
const trendingDatasets = data
.slice(0, 20)
.map((dataset) => dataset.id);
trendingDatasetsCache = trendingDatasets;
cacheTimestamp = Date.now();
return trendingDatasets;
} catch (error) {
console.error("Error fetching trending datasets:", error);
return [];
}
}
function displaySuggestions(datasets, suggestionsBox) {
if (datasets.length > 0) {
suggestionsBox.innerHTML = datasets
.map(
(datasetId) => `
<div
class="p-3 hover:bg-gray-50 cursor-pointer border-b last:border-b-0"
onclick="selectSuggestion('${datasetId}')"
>
<div class="flex items-center gap-2">
<i data-lucide="database" class="w-4 h-4 text-blue-500"></i>
<span>${datasetId}</span>
</div>
</div>
`
)
.join("");
suggestionsBox.classList.remove("hidden");
lucide.createIcons();
} else {
suggestionsBox.classList.add("hidden");
}
}
function selectSuggestion(dataset) {
const datasetInput = document.getElementById("datasetInput");
const suggestionsBox = document.getElementById("suggestionsBox");
datasetInput.value = dataset;
suggestionsBox.classList.add("hidden");
findSimilarDatasets();
}
// Modify the findSimilarDatasets function
async function findSimilarDatasets(page = 1) {
const datasetId = document.getElementById("datasetInput").value;
if (!datasetId) return;
// Update URL with similar dataset ID
updateURL({ similar: datasetId, q: null });
const similarLoader = document.getElementById("similarLoader");
if (similarLoader) {
similarLoader.classList.remove("hidden");
}
document.getElementById("errorMessage").classList.add("hidden");
try {
const response = await fetch(
`${API_URL}/similarity/datasets?dataset_id=${encodeURIComponent(
datasetId
)}&k=${RESULTS_PER_PAGE * page}`
);
if (!response.ok) throw new Error("Similarity search failed");
const data = await response.json();
displayResults(data.results, page);
} catch (error) {
showError("Failed to find similar datasets. Please try again.");
} finally {
if (similarLoader) {
similarLoader.classList.add("hidden");
}
}
}
// Display results
function displayResults(results, page = 1) {
const container = document.getElementById("resultsContainer");
console.log("Displaying results:", results);
if (results && results.length > 0) {
// Sort results if not using similarity
if (currentSort !== "similarity") {
results.sort((a, b) => b[currentSort] - a[currentSort]);
}
container.innerHTML = `
<div class="flex justify-between items-center mb-4">
<h2 class="text-lg font-semibold">Results</h2>
<div class="flex items-center gap-4">
<span class="text-sm text-gray-500">Found ${
results.length
} results</span>
<button
onclick="shareResults()"
class="flex items-center gap-1 text-sm px-3 py-1.5 bg-white border border-gray-200 rounded-lg hover:bg-gray-50"
title="Copy link to these search results"
>
<i data-lucide="link"></i>
Copy Search Link
</button>
</div>
</div>
${results.map((result) => createResultCard(result)).join("")}
${
results.length >= RESULTS_PER_PAGE * page &&
RESULTS_PER_PAGE * (page + 1) <= MAX_RESULTS
? `<div class="mt-4 flex items-center justify-between">
<button
onclick="loadMore()"
class="px-6 py-3 bg-gray-100 hover:bg-gray-200 text-gray-700 rounded-lg transition-colors flex items-center gap-2"
>
<i data-lucide="more-horizontal"></i>
Load More Results
</button>
<button
onclick="shareResults()"
class="flex items-center gap-1 text-sm px-3 py-1.5 bg-white border border-gray-200 rounded-lg hover:bg-gray-50"
title="Copy link to these search results"
>
<i data-lucide="link"></i>
Copy Search Link
</button>
</div>`
: results.length >= MAX_RESULTS
? `<div class="text-center mt-4 p-6 bg-blue-50 rounded-lg">
<p class="text-gray-700 mb-3">You've reached the end of our dataset journey! (${MAX_RESULTS} results)</p>
<p class="text-gray-600 mb-4">Can't find what you're looking for? Why not create and share your own dataset?</p>
<div class="flex items-center justify-center gap-4">
<a href="https://huggingface.co/docs/datasets/upload_dataset"
target="_blank"
class="inline-flex items-center gap-2 text-blue-500 hover:text-blue-700">
<i data-lucide="external-link"></i>
Learn how to share your dataset on Hugging Face
</a>
<button
onclick="shareResults()"
class="flex items-center gap-1 text-sm px-3 py-1.5 bg-white border border-gray-200 rounded-lg hover:bg-gray-50"
title="Copy link to these search results"
>
<i data-lucide="link"></i>
Copy Search Link
</button>
</div>
</div>`
: ""
}
`;
lucide.createIcons();
} else {
container.innerHTML = `
<div class="text-center text-gray-500">
No results found
</div>
`;
}
}
// Show error message
function showError(message) {
const errorElement = document.getElementById("errorMessage");
errorElement.textContent = message;
errorElement.classList.remove("hidden");
}
// Event listeners
document
.getElementById("searchInput")
.addEventListener("input", (e) => searchDatasets(e.target.value));
document
.getElementById("datasetInput")
.addEventListener("keydown", (e) => {
if (e.key === "Enter") findSimilarDatasets();
});
// Update the findSimilarFromResult function
function findSimilarFromResult(datasetId) {
// Switch to the similar tab
switchTab("similar");
// Set the dataset ID in the input without triggering the focus event
const datasetInput = document.getElementById("datasetInput");
datasetInput.value = datasetId;
// Hide suggestions box explicitly
const suggestionsBox = document.getElementById("suggestionsBox");
suggestionsBox.classList.add("hidden");
// Trigger the search
findSimilarDatasets();
}
// Add accordion functionality
function toggleAccordion() {
const content = document.getElementById("accordionContent");
const icon = document.getElementById("accordionIcon");
content.classList.toggle("hidden");
icon.style.transform = content.classList.contains("hidden")
? "rotate(0deg)"
: "rotate(90deg)";
}
// Add the loadMore function
function loadMore() {
currentPage += 1;
const activeTab = document.querySelector(".tab-trigger.active").id;
if (activeTab === "searchTab") {
const searchQuery = document.getElementById("searchInput").value;
searchDatasets(searchQuery, currentPage);
} else {
findSimilarDatasets(currentPage);
}
}
// Add this new function for toggling the preview
function togglePreview(datasetId) {
const content = document.getElementById(`preview-content-${datasetId}`);
const icon = document.getElementById(`preview-icon-${datasetId}`);
content.classList.toggle("hidden");
icon.style.transform = content.classList.contains("hidden")
? "rotate(0deg)"
: "rotate(90deg)";
}
// Update the share function name and remove the datasetId parameter
async function shareResults() {
const activeTab = document.querySelector(".tab-trigger.active").id;
const currentURL = new URL(window.location);
// Update URL based on active tab
if (activeTab === "searchTab") {
const searchQuery = document.getElementById("searchInput").value;
currentURL.searchParams.set("q", searchQuery);
currentURL.searchParams.delete("similar");
} else {
const datasetId = document.getElementById("datasetInput").value;
currentURL.searchParams.set("similar", datasetId);
currentURL.searchParams.delete("q");
}
try {
await navigator.clipboard.writeText(currentURL.toString());
// Show success message
const buttons = document.querySelectorAll(
'button[onclick="shareResults()"]'
);
buttons.forEach((button) => {
const originalHTML = button.innerHTML;
button.innerHTML =
'<i data-lucide="check"></i> Search Link Copied!';
button.classList.add(
"bg-green-50",
"border-green-200",
"text-green-600"
);
lucide.createIcons();
setTimeout(() => {
button.innerHTML = originalHTML;
button.classList.remove(
"bg-green-50",
"border-green-200",
"text-green-600"
);
lucide.createIcons();
}, 2000);
});
} catch (error) {
console.error("Error copying to clipboard:", error);
}
}
// Update the event listeners section
document.addEventListener("DOMContentLoaded", async () => {
const datasetInput = document.getElementById("datasetInput");
let programmaticFocus = false;
// Add input event listener for suggestions
datasetInput.addEventListener("input", async (e) => {
const suggestionsBox = document.getElementById("suggestionsBox");
const value = e.target.value;
if (!programmaticFocus) {
if (!value) {
// Show trending datasets when input is empty
const trending = await fetchTrendingDatasets();
displaySuggestions(trending, suggestionsBox);
} else {
// Filter trending datasets based on input
const trending = await fetchTrendingDatasets();
const filtered = trending.filter((dataset) =>
dataset.toLowerCase().includes(value.toLowerCase())
);
displaySuggestions(filtered, suggestionsBox);
}
}
});
// Show trending datasets on focus only when not programmatically focused
datasetInput.addEventListener("focus", async () => {
if (!programmaticFocus) {
const suggestionsBox = document.getElementById("suggestionsBox");
const trending = await fetchTrendingDatasets();
displaySuggestions(trending, suggestionsBox);
}
programmaticFocus = false;
});
// Handle initial URL parameters
if (INITIAL_SEARCH) {
switchTab("search");
document.getElementById("searchInput").value = INITIAL_SEARCH;
await searchDatasets(INITIAL_SEARCH);
} else if (INITIAL_SIMILAR) {
switchTab("similar");
document.getElementById("datasetInput").value = INITIAL_SIMILAR;
await findSimilarDatasets();
}
});
// Modify the handleSortChange function
function handleSortChange(tab) {
const sortSelect = document.getElementById(`${tab}SortSelect`);
currentSort = sortSelect.value;
// Re-run the current search with new sort
const activeTab = document.querySelector(".tab-trigger.active").id;
if (activeTab === "searchTab") {
const searchQuery = document.getElementById("searchInput").value;
if (searchQuery.length >= MIN_SEARCH_LENGTH) {
searchDatasets(searchQuery, 1);
}
} else {
const datasetId = document.getElementById("datasetInput").value;
if (datasetId) {
findSimilarDatasets(1);
}
}
}
</script>
</body>
</html>