Spaces:

kmkarakaya
/

turkishTextClassifier

Runtime error

File size: 1,681 Bytes

0f09c6c

import tensorflow as tf
import numpy as np 
import pickle
import string
import gradio as gr

@tf.keras.utils.register_keras_serializable()
def custom_standardization(input_string):
    """ Remove html line-break tags and handle punctuation """
    no_uppercased = tf.strings.lower(input_string, encoding='utf-8')
    no_stars = tf.strings.regex_replace(no_uppercased, "\*", " ")
    no_repeats = tf.strings.regex_replace(no_stars, "devamını oku", "")    
    no_html = tf.strings.regex_replace(no_repeats, "", "")
    no_digits = tf.strings.regex_replace(no_html, "\w*\d\w*","")
    no_punctuations = tf.strings.regex_replace(no_digits, f"([{string.punctuation}])", r" ")
    #remove stop words
    #no_stop_words = ' '+no_punctuations+ ' '
    #for each in tr_stop_words.values:
    #  no_stop_words = tf.strings.regex_replace(no_stop_words, ' '+each[0]+' ' , r" ")
    no_extra_space = tf.strings.regex_replace(no_punctuations, " +"," ")
    #remove Turkish chars
    no_I = tf.strings.regex_replace(no_extra_space, "ı","i")
    no_O = tf.strings.regex_replace(no_I, "ö","o")
    no_C = tf.strings.regex_replace(no_O, "ç","c")
    no_S = tf.strings.regex_replace(no_C, "ş","s")
    no_G = tf.strings.regex_replace(no_S, "ğ","g")
    no_U = tf.strings.regex_replace(no_G, "ü","u")
    return no_U

end_to_end_model=tf.keras.models.load_model('MCTC_Conv1D_E2E')

with open('id_to_category.pkl', 'rb') as fp:
    id_to_category = pickle.load(fp)

def text_classifier(text):
  predictions=end_to_end_model.predict(examples)
  for pred in predictions:
    return(id_to_category[np.argmax(pred)])

iface= gr.Interface(fn=text_classifier, inputs="text", outputs="text").launch()