Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -46,8 +46,11 @@ def process_dataset(file):
|
|
46 |
cleaned_data = clean_data(data)
|
47 |
|
48 |
# Step 2: Create chunks
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
51 |
# Step 3: Generate embeddings
|
52 |
cleaned_data['embeddings'] = cleaned_data['chunks'].apply(lambda chunks: [generate_embeddings(chunk) for chunk in chunks])
|
53 |
|
@@ -63,8 +66,8 @@ def gradio_interface(file):
|
|
63 |
# Gradio App
|
64 |
ui = gr.Interface(
|
65 |
fn=gradio_interface,
|
66 |
-
inputs=gr.
|
67 |
-
outputs=gr.
|
68 |
title="Data Cleaning and Embedding Tool",
|
69 |
description="Upload your dataset to clean, chunk, and generate embeddings using Llama LLM with Groq API. Perfect for deployment on Hugging Face.",
|
70 |
theme="compact",
|
|
|
46 |
cleaned_data = clean_data(data)
|
47 |
|
48 |
# Step 2: Create chunks
|
49 |
+
if 'text_column' in cleaned_data.columns:
|
50 |
+
cleaned_data['chunks'] = cleaned_data['text_column'].apply(chunk_text)
|
51 |
+
else:
|
52 |
+
return "Error: 'text_column' not found in the dataset."
|
53 |
+
|
54 |
# Step 3: Generate embeddings
|
55 |
cleaned_data['embeddings'] = cleaned_data['chunks'].apply(lambda chunks: [generate_embeddings(chunk) for chunk in chunks])
|
56 |
|
|
|
66 |
# Gradio App
|
67 |
ui = gr.Interface(
|
68 |
fn=gradio_interface,
|
69 |
+
inputs=gr.File(label="Upload CSV Dataset"),
|
70 |
+
outputs=gr.Textbox(label="Processing Result"),
|
71 |
title="Data Cleaning and Embedding Tool",
|
72 |
description="Upload your dataset to clean, chunk, and generate embeddings using Llama LLM with Groq API. Perfect for deployment on Hugging Face.",
|
73 |
theme="compact",
|