Spaces:
Running
Running
File size: 3,129 Bytes
cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 cd3cab0 53feba4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import torch
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_scheduler
from datasets import load_dataset
from torch.utils.data import DataLoader
import streamlit as st
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
# Load pre-trained model and tokenizer from Hugging Face
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
# Streamlit interface
st.title("Sentiment Analysis with BERT")
# Training setup
st.sidebar.title("Training Setup")
num_epochs = st.sidebar.slider("Number of Epochs", 1, 5, 3)
batch_size = st.sidebar.slider("Batch Size", 4, 32, 8)
learning_rate = st.sidebar.slider("Learning Rate", 1e-6, 1e-3, 5e-5, format="%.6f")
# Load and preprocess dataset
@st.cache(allow_output_mutation=True)
def load_and_preprocess_data():
dataset = load_dataset("imdb", split="train[:1%]")
def preprocess_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
encoded_dataset = dataset.map(preprocess_function, batched=True)
encoded_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
return DataLoader(encoded_dataset, shuffle=True, batch_size=batch_size)
train_dataloader = load_and_preprocess_data()
# Training loop
if st.sidebar.button("Train"):
optimizer = AdamW(model.parameters(), lr=learning_rate)
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)
progress_bar = tqdm(range(num_training_steps))
loss_values = []
model.train()
for epoch in range(num_epochs):
for batch in train_dataloader:
batch = {k: v.to(device) for k, v in batch.items()}
outputs = model(**batch)
loss = outputs.loss
loss.backward()
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
progress_bar.update(1)
loss_values.append(loss.item())
st.sidebar.success("Training completed")
# Plot loss values
st.write("### Training Loss")
plt.figure(figsize=(10, 6))
plt.plot(loss_values, label="Training Loss")
plt.xlabel("Training Steps")
plt.ylabel("Loss")
plt.legend()
st.pyplot(plt)
# Text input for prediction
st.write("### Predict Sentiment")
user_input = st.text_area("Enter text:", "I loved this movie!")
if user_input:
inputs = tokenizer(user_input, padding="max_length", truncation=True, max_length=128, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
model.eval()
with torch.no_grad():
outputs = model(**inputs)
prediction = outputs.logits.argmax(dim=-1).item()
sentiment = "Positive" if prediction == 1 else "Negative"
st.write(f"Sentiment: **{sentiment}**")
|