Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,6 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, Training
|
|
5 |
from transformers import pipeline
|
6 |
from sklearn.model_selection import train_test_split
|
7 |
|
8 |
-
Load and preprocess the dataset
|
9 |
def load_and_preprocess_data():
|
10 |
dataset = load_dataset('tahiryaqoob/BISELahore')
|
11 |
train_dataset, val_dataset = train_test_split(dataset['train'], test_size=0.2, random_state=42)
|
@@ -14,14 +13,13 @@ def load_and_preprocess_data():
|
|
14 |
print(f"Validation samples: {len(val_dataset)}")
|
15 |
return train_dataset, val_dataset
|
16 |
|
17 |
-
Preprocess the data to format for fine-
|
18 |
def preprocess_function(examples, tokenizer):
|
19 |
inputs = tokenizer(examples['question'], padding="max_length", truncation=True, max_length=128)
|
20 |
targets = tokenizer(examples['answer'], padding="max_length", truncation=True, max_length=128)
|
21 |
inputs['labels'] = targets['input_ids']
|
22 |
return inputs
|
23 |
-
|
24 |
-
Fine-tune the model using the preprocessed data
|
25 |
def fine_tune_model(train_dataset, val_dataset):
|
26 |
model_name = "distilbert-base-uncased"
|
27 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
@@ -57,7 +55,7 @@ def fine_tune_model(train_dataset, val_dataset):
|
|
57 |
tokenizer.save_pretrained("./distilbert_finetuned")
|
58 |
print("Model fine-tuned and saved successfully.")
|
59 |
|
60 |
-
Create a chatbot inference pipeline using the fine-tuned model
|
61 |
def chatbot_inference():
|
62 |
model_name = "./distilbert_finetuned"
|
63 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
@@ -66,14 +64,14 @@ def chatbot_inference():
|
|
66 |
chatbot = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
|
67 |
return chatbot
|
68 |
|
69 |
-
Run inference to test chatbot functionality
|
70 |
def run_inference():
|
71 |
chatbot = chatbot_inference()
|
72 |
user_input = input("Ask a question: ")
|
73 |
response = chatbot(user_input)
|
74 |
print("Bot Response:", response[0]['generated_text'])
|
75 |
|
76 |
-
Main function to train or serve the chatbot
|
77 |
def main():
|
78 |
train_dataset, val_dataset = load_and_preprocess_data()
|
79 |
|
|
|
5 |
from transformers import pipeline
|
6 |
from sklearn.model_selection import train_test_split
|
7 |
|
|
|
8 |
def load_and_preprocess_data():
|
9 |
dataset = load_dataset('tahiryaqoob/BISELahore')
|
10 |
train_dataset, val_dataset = train_test_split(dataset['train'], test_size=0.2, random_state=42)
|
|
|
13 |
print(f"Validation samples: {len(val_dataset)}")
|
14 |
return train_dataset, val_dataset
|
15 |
|
16 |
+
#Preprocess the data to format for fine-tunin
|
17 |
def preprocess_function(examples, tokenizer):
|
18 |
inputs = tokenizer(examples['question'], padding="max_length", truncation=True, max_length=128)
|
19 |
targets = tokenizer(examples['answer'], padding="max_length", truncation=True, max_length=128)
|
20 |
inputs['labels'] = targets['input_ids']
|
21 |
return inputs
|
22 |
+
|
|
|
23 |
def fine_tune_model(train_dataset, val_dataset):
|
24 |
model_name = "distilbert-base-uncased"
|
25 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
55 |
tokenizer.save_pretrained("./distilbert_finetuned")
|
56 |
print("Model fine-tuned and saved successfully.")
|
57 |
|
58 |
+
#Create a chatbot inference pipeline using the fine-tuned model
|
59 |
def chatbot_inference():
|
60 |
model_name = "./distilbert_finetuned"
|
61 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
64 |
chatbot = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
|
65 |
return chatbot
|
66 |
|
67 |
+
#Run inference to test chatbot functionality
|
68 |
def run_inference():
|
69 |
chatbot = chatbot_inference()
|
70 |
user_input = input("Ask a question: ")
|
71 |
response = chatbot(user_input)
|
72 |
print("Bot Response:", response[0]['generated_text'])
|
73 |
|
74 |
+
#Main function to train or serve the chatbot
|
75 |
def main():
|
76 |
train_dataset, val_dataset = load_and_preprocess_data()
|
77 |
|