Spaces:

Chemically-motivated
/

OSINT_Tool

Running

File size: 1,920 Bytes

4fa8602

import pandas as pd
from datasets import Dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import streamlit as st

def fine_tune_model(uploaded_file):
    # Read CSV file
    df = pd.read_csv(uploaded_file)
    st.subheader("Dataset Preview")
    st.write(df.head())
    
    # Check for a 'text' column or allow user to choose a column
    if 'text' not in df.columns:
        st.warning("No 'text' column found. Please select the column to use for fine-tuning.")
        column_choice = st.selectbox("Select the column containing text data", df.columns)
        df['text'] = df[column_choice]  # Create a 'text' column based on user selection

    # Convert CSV to Hugging Face dataset format
    dataset = Dataset.from_pandas(df)
    
    model_name = st.selectbox("Select model for fine-tuning", ["distilbert-base-uncased"])
    
    if st.button("Fine-tune Model"):
        if model_name:
            try:
                model = AutoModelForSequenceClassification.from_pretrained(model_name)
                tokenizer = AutoTokenizer.from_pretrained(model_name)

                def preprocess_function(examples):
                    return tokenizer(examples['text'], truncation=True, padding=True)

                tokenized_datasets = dataset.map(preprocess_function, batched=True)
                
                # Fine-tuning logic (example)
                train_args = {
                    "output_dir": "./results",
                    "num_train_epochs": 3,
                    "per_device_train_batch_size": 16,
                    "logging_dir": "./logs",
                }
                
                st.success("Fine-tuning started (demo)!")  # Fine-tuning process goes here
            except Exception as e:
                st.error(f"Error during fine-tuning: {e}")
        else:
            st.warning("Please select a model for fine-tuning.")