File size: 2,527 Bytes
3f49524
 
 
 
1b8dd33
3f49524
 
 
 
 
1b8dd33
 
84a7335
1b8dd33
3f49524
 
1158018
1152b2a
 
072f63a
efa0dd9
 
 
 
6f062c5
1158018
 
 
6f062c5
1158018
 
 
 
 
 
 
 
072f63a
6f062c5
610bb32
1158018
6f062c5
 
3f49524
6f062c5
 
 
 
 
 
1158018
6f062c5
3f49524
6f062c5
3f49524
072f63a
3f49524
 
 
 
610bb32
3f49524
 
 
 
6f062c5
3f49524
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification
import torch
from datasets import load_dataset

# Load pre-trained TinyBERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')
model = BertForSequenceClassification.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')

# Load dataset from Hugging Face repository
# Replace 'your-username' and 'your-dataset-name' with actual values
dataset = load_dataset('SharmaAmit1818/data_analysis/blob/main', data_files='data-qQeu1Z0CfsuqRUaDagRA1 (1).csv')

# Function to process the CSV file and generate predictions
def process_csv(file):
    try:
        # Read the CSV file using Pandas directly from the uploaded file object
        df = pd.read_csv(file)  # Use the file object directly
        
        # Debugging: Print the DataFrame shape and columns
        print(f"DataFrame shape: {df.shape}")
        print(f"DataFrame columns: {df.columns.tolist()}")

        # Check for 'text' column
        if 'text' not in df.columns:
            return "Error: The CSV file must contain a 'text' column."
        
        # Tokenize input text
        inputs = tokenizer(df['text'].tolist(), return_tensors='pt', padding=True, truncation=True)
        
        # Perform inference
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Get predicted classes
        _, predicted_classes = torch.max(outputs.logits, dim=1)
        
        # Add predictions to DataFrame
        df['predicted_class'] = predicted_classes.numpy()
        
        # Return processed DataFrame as CSV string
        return df.to_csv(index=False)
    
    except FileNotFoundError:
        return "Error: The specified file was not found. Please check your upload."
    except pd.errors.EmptyDataError:
        return "Error: The uploaded file is empty."
    except pd.errors.ParserError:
        return "Error: There was an issue parsing the CSV file."
    except Exception as e:
        return f"An unexpected error occurred: {str(e)}"

# Create Gradio interface
input_csv = gr.File(label="Upload CSV File")
output_csv = gr.File(label="Download Processed CSV")

demo = gr.Interface(
    fn=process_csv,
    inputs=input_csv,
    outputs=output_csv,
    title="CSV Data Processing with TinyBERT",
    description="Upload a CSV file with a 'text' column, and the model will process the data and provide predictions."
)

# Launch Gradio interface
demo.launch()