xeroISB commited on
Commit
5163bee
·
verified ·
1 Parent(s): 2753f4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -3,9 +3,7 @@ import numpy as np
3
  import pandas as pd
4
  from keras.models import load_model
5
  from huggingface_hub import hf_hub_download
6
- from tensorflow.keras.preprocessing.text import Tokenizer
7
- from tensorflow.keras.preprocessing.sequence import pad_sequences
8
- from tensorflow.keras.models import Sequential
9
  from sklearn.preprocessing import LabelEncoder, StandardScaler
10
  from nltk.sentiment.vader import SentimentIntensityAnalyzer
11
  import nltk
@@ -17,8 +15,10 @@ nltk.download('vader_lexicon')
17
  model_path = hf_hub_download(repo_id="xeroISB/ServiceNowMTTR", filename="my_model.h5")
18
  model = load_model(model_path)
19
 
20
- # Initialize Tokenizer and LabelEncoders
21
- tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')
 
 
22
  label_encoders = {
23
  'impact': LabelEncoder(),
24
  'priority': LabelEncoder(),
@@ -39,14 +39,12 @@ def preprocess_input(short_description, impact, priority, category, urgency):
39
 
40
  for column in ['impact', 'priority', 'category', 'urgency']:
41
  input_data[column] = label_encoders[column].fit_transform(input_data[column])
 
42
  short_description = input_data['short_description'].iloc[0].lower()
43
- # Tokenize text data
44
- sequences = tokenizer.texts_to_sequences([short_description])
45
- print("Short description",input_data['short_description'].iloc[0])
46
- print("Sequence",sequences)
47
- if not sequences:
48
- return None, None # Handle empty sequences
49
- padded_sequences = pad_sequences(sequences, maxlen=50, padding='post', truncating='post')
50
 
51
  # Feature engineering: Add sentiment score
52
  sid = SentimentIntensityAnalyzer()
@@ -55,7 +53,6 @@ def preprocess_input(short_description, impact, priority, category, urgency):
55
  # Normalize numerical features
56
  numerical_features = input_data[['impact', 'priority', 'category', 'urgency', 'sentiment_score']]
57
  scaler = StandardScaler()
58
-
59
  scaled_numerical_features = scaler.fit_transform(numerical_features)
60
 
61
  # Prepare the final input features
 
3
  import pandas as pd
4
  from keras.models import load_model
5
  from huggingface_hub import hf_hub_download
6
+ from transformers import BertTokenizer
 
 
7
  from sklearn.preprocessing import LabelEncoder, StandardScaler
8
  from nltk.sentiment.vader import SentimentIntensityAnalyzer
9
  import nltk
 
15
  model_path = hf_hub_download(repo_id="xeroISB/ServiceNowMTTR", filename="my_model.h5")
16
  model = load_model(model_path)
17
 
18
+ # Initialize BERT tokenizer
19
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
20
+
21
+ # Initialize LabelEncoders
22
  label_encoders = {
23
  'impact': LabelEncoder(),
24
  'priority': LabelEncoder(),
 
39
 
40
  for column in ['impact', 'priority', 'category', 'urgency']:
41
  input_data[column] = label_encoders[column].fit_transform(input_data[column])
42
+
43
  short_description = input_data['short_description'].iloc[0].lower()
44
+
45
+ # Tokenize text data using BERT tokenizer
46
+ inputs = tokenizer(short_description, return_tensors='tf', padding='max_length', truncation=True, max_length=50)
47
+ padded_sequences = np.array(inputs['input_ids'])
 
 
 
48
 
49
  # Feature engineering: Add sentiment score
50
  sid = SentimentIntensityAnalyzer()
 
53
  # Normalize numerical features
54
  numerical_features = input_data[['impact', 'priority', 'category', 'urgency', 'sentiment_score']]
55
  scaler = StandardScaler()
 
56
  scaled_numerical_features = scaler.fit_transform(numerical_features)
57
 
58
  # Prepare the final input features