Spaces:
Running
Running
File size: 2,319 Bytes
d2b9031 49e25d2 ff86828 d2b9031 90f89f0 2bf1e25 90f89f0 1fd0c30 90f89f0 2bf1e25 90f89f0 2bf1e25 90f89f0 2bf1e25 90f89f0 2bf1e25 90f89f0 2bf1e25 90f89f0 7773ef1 49e25d2 90f89f0 2bf1e25 90f89f0 fcd8f70 90f89f0 49e25d2 72dd3ca ff86828 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import gradio as gr
import pandas as pd
import requests
from io import BytesIO
def convert_hf_dataset(file_url: str):
file_url = file_url.strip()
# Check that the URL is from Hugging Face
if "huggingface.co" not in file_url:
raise ValueError("Please provide a URL from Hugging Face datasets.")
# Ensure the URL has a scheme; if not, add "https://"
if not file_url.lower().startswith(("http://", "https://")):
file_url = "https://" + file_url
# Download the content from the URL
response = requests.get(file_url)
response.raise_for_status()
content = response.content
# Determine file type from URL extension and convert accordingly
if file_url.lower().endswith(".csv"):
# If it's a CSV, read it and convert to Parquet
df = pd.read_csv(BytesIO(content))
output_file = "output.parquet"
df.to_parquet(output_file, index=False)
converted_format = "Parquet"
elif file_url.lower().endswith(".parquet"):
# If it's a Parquet file, read it and convert to CSV
df = pd.read_parquet(BytesIO(content))
output_file = "output.csv"
df.to_csv(output_file, index=False)
converted_format = "CSV"
else:
raise ValueError("The URL must point to a .csv or .parquet file.")
# Create a preview of the top 10 rows
preview = df.head(10).to_string(index=False)
info_message = (
f"Input file: {file_url.split('/')[-1]}\n"
f"Converted file format: {converted_format}\n\n"
f"Preview (Top 10 Rows):\n{preview}"
)
return output_file, info_message
demo = gr.Interface(
fn=convert_hf_dataset,
inputs=gr.Textbox(
label="Hugging Face Dataset URL",
placeholder="e.g., huggingface.co/datasets/username/dataset/filename.csv"
),
outputs=[
gr.File(label="Converted File"),
gr.Textbox(label="Preview (Top 10 Rows)", lines=15)
],
title="Hugging Face CSV <-> Parquet Converter",
description=(
"Enter the URL of a Hugging Face dataset file (must end with .csv or .parquet). "
"The app will automatically detect the file type, convert it to the opposite format, "
"and display a preview of the top 10 rows."
)
)
if __name__ == "__main__":
demo.launch()
|