Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,40 +3,34 @@ import pandas as pd
|
|
3 |
import requests
|
4 |
from io import BytesIO
|
5 |
|
6 |
-
def
|
7 |
-
# Read the
|
8 |
-
if
|
9 |
-
df = pd.
|
10 |
-
elif
|
11 |
-
response = requests.get(
|
12 |
-
response.raise_for_status() #
|
13 |
-
df = pd.
|
14 |
else:
|
15 |
-
raise ValueError("Either
|
16 |
-
|
17 |
-
# Clean string columns to handle any invalid UTF-8 sequences
|
18 |
-
for col in df.select_dtypes(include=["object"]).columns:
|
19 |
-
df[col] = df[col].apply(
|
20 |
-
lambda x: x.encode("utf-8", errors="replace").decode("utf-8", errors="replace")
|
21 |
-
if isinstance(x, str) else x
|
22 |
-
)
|
23 |
-
|
24 |
-
# Convert the DataFrame to CSV format
|
25 |
-
csv_data = df.to_csv(index=False)
|
26 |
-
|
27 |
-
# Save the CSV data to a file
|
28 |
-
output_file_path = "output.csv"
|
29 |
-
with open(output_file_path, "w", encoding="utf-8") as f:
|
30 |
-
f.write(csv_data)
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
return output_file_path
|
33 |
|
34 |
demo = gr.Interface(
|
35 |
-
fn=
|
36 |
-
inputs=[
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
40 |
)
|
41 |
|
42 |
if __name__ == "__main__":
|
|
|
3 |
import requests
|
4 |
from io import BytesIO
|
5 |
|
6 |
+
def convert_csv_to_parquet(csv_file=None, csv_url=None):
|
7 |
+
# Read the CSV file either from an uploaded file or from a URL
|
8 |
+
if csv_file is not None:
|
9 |
+
df = pd.read_csv(csv_file.name)
|
10 |
+
elif csv_url is not None:
|
11 |
+
response = requests.get(csv_url)
|
12 |
+
response.raise_for_status() # Ensure the request was successful
|
13 |
+
df = pd.read_csv(BytesIO(response.content))
|
14 |
else:
|
15 |
+
raise ValueError("Either csv_file or csv_url must be provided")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# Optionally, perform any cleaning on the DataFrame here if needed
|
18 |
+
|
19 |
+
# Save the DataFrame as a Parquet file
|
20 |
+
output_file_path = "output.parquet"
|
21 |
+
df.to_parquet(output_file_path, index=False)
|
22 |
+
|
23 |
return output_file_path
|
24 |
|
25 |
demo = gr.Interface(
|
26 |
+
fn=convert_csv_to_parquet,
|
27 |
+
inputs=[
|
28 |
+
gr.File(label="CSV File"),
|
29 |
+
gr.Textbox(label="CSV File URL", placeholder="Enter a URL to a CSV file")
|
30 |
+
],
|
31 |
+
outputs=[gr.File(label="Parquet Output")],
|
32 |
+
title="CSV to Parquet Converter",
|
33 |
+
description="Convert a CSV file to Parquet format from a downloadable link or file upload"
|
34 |
)
|
35 |
|
36 |
if __name__ == "__main__":
|