openfree commited on
Commit
6a1564b
·
verified ·
1 Parent(s): 0df8fba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -70
app.py CHANGED
@@ -1,98 +1,62 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import requests
4
  from io import BytesIO
5
 
6
- def convert_hf_dataset(input_file, file_url):
7
- """
8
- This function accepts either an uploaded file or a Hugging Face dataset URL.
9
- It automatically determines the file type (CSV or Parquet) based on the file extension,
10
- converts the file to the opposite format, and returns the converted file along with a preview
11
- of the top 10 rows.
12
- """
13
- df = None
14
- source = None
15
- converted_format = None
16
- output_file = None
17
-
18
- # If no file is provided via upload and URL is empty, raise an error.
19
- if input_file is None and (file_url is None or file_url.strip() == ""):
20
- raise ValueError("Please provide an uploaded file or a Hugging Face dataset URL.")
21
-
22
- if input_file is not None:
23
- # Process the uploaded file.
24
- source = input_file.name
25
- file_extension = source.lower().split('.')[-1]
26
- file_bytes = input_file.read() # read the file content
27
-
28
- if file_extension == "csv":
29
- df = pd.read_csv(BytesIO(file_bytes))
30
- converted_format = "Parquet"
31
- output_file = "output.parquet"
32
- elif file_extension == "parquet":
33
- df = pd.read_parquet(BytesIO(file_bytes))
34
- converted_format = "CSV"
35
- output_file = "output.csv"
36
- else:
37
- raise ValueError("Uploaded file must have a .csv or .parquet extension.")
38
- else:
39
- # Process the URL input.
40
- file_url = file_url.strip()
41
- if "huggingface.co" not in file_url:
42
- raise ValueError("Please provide a URL from Hugging Face datasets.")
43
- if not file_url.lower().startswith(("http://", "https://")):
44
- file_url = "https://" + file_url
45
 
46
- source = file_url.split('/')[-1]
47
- response = requests.get(file_url)
48
- response.raise_for_status()
49
- content = response.content
50
 
51
- if file_url.lower().endswith(".csv"):
52
- df = pd.read_csv(BytesIO(content))
53
- converted_format = "Parquet"
54
- output_file = "output.parquet"
55
- elif file_url.lower().endswith(".parquet"):
56
- df = pd.read_parquet(BytesIO(content))
57
- converted_format = "CSV"
58
- output_file = "output.csv"
59
- else:
60
- raise ValueError("The URL must point to a .csv or .parquet file.")
61
 
62
- # Convert the file: if CSV, convert to Parquet; if Parquet, convert to CSV.
63
- if converted_format == "Parquet":
 
 
 
 
64
  df.to_parquet(output_file, index=False)
65
- else:
 
 
 
 
 
 
66
  df.to_csv(output_file, index=False)
 
 
 
67
 
68
- # Create a preview (top 10 rows) of the DataFrame.
69
  preview = df.head(10).to_string(index=False)
70
  info_message = (
71
- f"Input file: {source}\n"
72
  f"Converted file format: {converted_format}\n\n"
73
  f"Preview (Top 10 Rows):\n{preview}"
74
  )
75
-
76
  return output_file, info_message
77
 
78
  demo = gr.Interface(
79
- fn=convert_hf_dataset,
80
  inputs=[
81
- gr.File(label="Uploaded File (Optional)"),
82
- gr.Textbox(
83
- label="Hugging Face Dataset URL (Optional)",
84
- placeholder="e.g., huggingface.co/datasets/username/dataset/filename.csv"
85
- )
86
  ],
87
  outputs=[
88
  gr.File(label="Converted File"),
89
  gr.Textbox(label="Preview (Top 10 Rows)", lines=15)
90
  ],
91
- title="Hugging Face CSV <-> Parquet Converter",
92
  description=(
93
- "Upload a file or enter the URL of a Hugging Face dataset file. "
94
- "The app automatically detects the file type (.csv or .parquet), converts it to the opposite format, "
95
- "and displays a preview of the top 10 rows."
96
  )
97
  )
98
 
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  from io import BytesIO
4
 
5
+ def convert_file(input_file, conversion_type):
6
+ # Check if a file was uploaded
7
+ if input_file is None:
8
+ raise ValueError("Please upload a file.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ file_name = input_file.name
11
+ file_extension = file_name.lower().split('.')[-1]
12
+ file_bytes = input_file.read()
 
13
 
14
+ df = None
15
+ output_file = None
16
+ converted_format = None
 
 
 
 
 
 
 
17
 
18
+ # Conversion: CSV to Parquet
19
+ if conversion_type == "CSV to Parquet":
20
+ if file_extension != "csv":
21
+ raise ValueError("For CSV to Parquet conversion, please upload a CSV file.")
22
+ df = pd.read_csv(BytesIO(file_bytes))
23
+ output_file = "output.parquet"
24
  df.to_parquet(output_file, index=False)
25
+ converted_format = "Parquet"
26
+ # Conversion: Parquet to CSV
27
+ elif conversion_type == "Parquet to CSV":
28
+ if file_extension != "parquet":
29
+ raise ValueError("For Parquet to CSV conversion, please upload a Parquet file.")
30
+ df = pd.read_parquet(BytesIO(file_bytes))
31
+ output_file = "output.csv"
32
  df.to_csv(output_file, index=False)
33
+ converted_format = "CSV"
34
+ else:
35
+ raise ValueError("Invalid conversion type selected.")
36
 
37
+ # Generate a preview of the top 10 rows
38
  preview = df.head(10).to_string(index=False)
39
  info_message = (
40
+ f"Input file: {file_name}\n"
41
  f"Converted file format: {converted_format}\n\n"
42
  f"Preview (Top 10 Rows):\n{preview}"
43
  )
 
44
  return output_file, info_message
45
 
46
  demo = gr.Interface(
47
+ fn=convert_file,
48
  inputs=[
49
+ gr.File(label="Upload CSV or Parquet File"),
50
+ gr.Radio(choices=["CSV to Parquet", "Parquet to CSV"], label="Conversion Type")
 
 
 
51
  ],
52
  outputs=[
53
  gr.File(label="Converted File"),
54
  gr.Textbox(label="Preview (Top 10 Rows)", lines=15)
55
  ],
56
+ title="CSV <-> Parquet Converter",
57
  description=(
58
+ "Upload a CSV or Parquet file and select the conversion type. "
59
+ "The app converts the file to the opposite format and displays a preview of the top 10 rows."
 
60
  )
61
  )
62