Spaces:
Running
Running
File size: 4,059 Bytes
e42ecfa a0f03b5 fd58ad7 cb01a3d cf64ea1 add7bd7 b871eea 32e8749 a7f33dd 32e8749 49e21b1 e42ecfa 7796c6e fba8174 32e8749 412b9d9 32e8749 a0332ee 32e8749 2401d61 32e8749 fba8174 49e21b1 fba8174 49e21b1 fba8174 c056a91 fba8174 4397a91 6ca040b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import zipfile
import sys
import os
#sys.path.append('https://huggingface.co/spaces/PradeepJha/ISCO-code-predictor-api/resolve/main/models.zip')
# Check current directory and list files
print("Current Directory:", os.getcwd())
print("Files in Directory:", os.listdir())
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tf_keras as keras
import pandas as pd
from tensorflow.keras.models import load_model
import classifier_data_lib
import tokenization
import joblib
from deep_translator import GoogleTranslator
print("NumPy Version:", np.__version__)
print("TensorFlow Version:", tf.__version__)
print("TensorFlow Hub Version:", hub.__version__)
print("Keras (via TensorFlow) Version:", keras.__version__)
print("Pandas Version:", pd.__version__)
print("Joblib Version:", joblib.__version__)
import gradio as gr
model = load_model('ISCO-Coder-BERT.h5', custom_objects={'KerasLayer': hub.KerasLayer})
bert_layer = hub.KerasLayer("https://kaggle.com/models/tensorflow/bert/TensorFlow2/en-uncased-l-12-h-768-a-12/1",trainable=True)
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = tokenization.FullTokenizer(vocab_file,do_lower_case)
# Parameters
max_seq_length = 128
dummy_label = 100
label_list = list(pd.read_excel('label_list.xlsx')['label_list'])
# Define a function to preprocess the new data
def get_feature_new(text, max_seq_length, tokenizer, dummy_label):
example = classifier_data_lib.InputExample(guid=None,
text_a=text.numpy().decode('utf-8'),
text_b=None,
label=dummy_label) # Use a valid dummy label
feature = classifier_data_lib.convert_single_example(0, example, label_list, max_seq_length, tokenizer)
return feature.input_ids, feature.input_mask, feature.segment_ids
def get_feature_map_new(text):
input_ids, input_mask, segment_ids = tf.py_function(
lambda text: get_feature_new(text, max_seq_length, tokenizer, dummy_label),
inp=[text],
Tout=[tf.int32, tf.int32, tf.int32]
)
input_ids.set_shape([max_seq_length])
input_mask.set_shape([max_seq_length])
segment_ids.set_shape([max_seq_length])
x = {'input_word_ids': input_ids,
'input_mask': input_mask,
'input_type_ids': segment_ids}
return x
def preprocess_new_data(texts):
dataset = tf.data.Dataset.from_tensor_slices((texts,))
dataset = dataset.map(get_feature_map_new,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(32, drop_remainder=False)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
return dataset
def launch(text_input):
# Load the label encoder
label_encoder = joblib.load('label_encoder.joblib')
# Preprocess the new data
try:
text_input = GoogleTranslator(source = 'auto',target = 'en').translate(text_input)
except:
text_input = text_input
sample_example = [text_input]
new_data_dataset = preprocess_new_data(sample_example)
# Assuming you have a model already loaded (add model loading code if needed)
# Make predictions on the new data
predictions = model.predict(new_data_dataset)
# Decode the predictions
predicted_classes = [label_list[np.argmax(pred)] for pred in predictions]
# Calculate the highest probabilities
highest_probabilities = [max(instance) for instance in predictions]
# Decode labels using the label encoder
decoded_labels = label_encoder.inverse_transform(predicted_classes)
print("Most likely ISCO code is {} and probability is {}".format(decoded_labels,highest_probabilities))
return [decoded_labels,highest_probabilities]
iface = gr.Interface(
fn=launch,
inputs=gr.Textbox(lines=2, placeholder="Enter job title and description here..."),
outputs="text"
)
iface.launch()
|