Haseeb-001 commited on
Commit
0e1df76
·
verified ·
1 Parent(s): 8ecd8e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -46,8 +46,11 @@ def process_dataset(file):
46
  cleaned_data = clean_data(data)
47
 
48
  # Step 2: Create chunks
49
- cleaned_data['chunks'] = cleaned_data['text_column'].apply(chunk_text)
50
-
 
 
 
51
  # Step 3: Generate embeddings
52
  cleaned_data['embeddings'] = cleaned_data['chunks'].apply(lambda chunks: [generate_embeddings(chunk) for chunk in chunks])
53
 
@@ -63,8 +66,8 @@ def gradio_interface(file):
63
  # Gradio App
64
  ui = gr.Interface(
65
  fn=gradio_interface,
66
- inputs=gr.inputs.File(label="Upload CSV Dataset"),
67
- outputs=gr.outputs.Textbox(label="Processing Result"),
68
  title="Data Cleaning and Embedding Tool",
69
  description="Upload your dataset to clean, chunk, and generate embeddings using Llama LLM with Groq API. Perfect for deployment on Hugging Face.",
70
  theme="compact",
 
46
  cleaned_data = clean_data(data)
47
 
48
  # Step 2: Create chunks
49
+ if 'text_column' in cleaned_data.columns:
50
+ cleaned_data['chunks'] = cleaned_data['text_column'].apply(chunk_text)
51
+ else:
52
+ return "Error: 'text_column' not found in the dataset."
53
+
54
  # Step 3: Generate embeddings
55
  cleaned_data['embeddings'] = cleaned_data['chunks'].apply(lambda chunks: [generate_embeddings(chunk) for chunk in chunks])
56
 
 
66
  # Gradio App
67
  ui = gr.Interface(
68
  fn=gradio_interface,
69
+ inputs=gr.File(label="Upload CSV Dataset"),
70
+ outputs=gr.Textbox(label="Processing Result"),
71
  title="Data Cleaning and Embedding Tool",
72
  description="Upload your dataset to clean, chunk, and generate embeddings using Llama LLM with Groq API. Perfect for deployment on Hugging Face.",
73
  theme="compact",