import itertools import os #%matplotlib inline import matplotlib.pyplot as plt import numpy as np import pandas as pd import tensorflow as tf from sklearn.preprocessing import LabelBinarizer, LabelEncoder from sklearn.metrics import confusion_matrix from tensorflow import keras from keras.models import Sequential from keras.layers import Dense, Activation, Dropout from keras.preprocessing import text, sequence from keras import utils from tensorflow.keras.utils import to_categorical from google.colab import auth auth.authenticate_user() import gspread from oauth2client.client import GoogleCredentials # setup gc = gspread.authorize(GoogleCredentials.get_application_default()) # read data and put it in a dataframe gsheets = gc.open_by_url('https://docs.google.com/spreadsheets/d/15XNk8vY1pL6bzUo16AHWrx7Gws1Mz5JxOCGvkTnAczA/edit#gid=0') sheets = gsheets.worksheet('Sheet1').get_all_values() df = pd.DataFrame(sheets[1:], columns=sheets[0]) train_size = int(len(df) * 0.7) train_posts = df['post'][:train_size] train_tags = df['tags'][:train_size] test_posts = df['post'][train_size:] test_tags = df['tags'][train_size:] max_words = 1000 tokenize = text.Tokenizer(num_words=max_words, char_level=False) tokenize.fit_on_texts(train_posts) # only fit on train x_train = tokenize.texts_to_matrix(train_posts) x_test = tokenize.texts_to_matrix(test_posts) encoder = LabelEncoder() encoder.fit(train_tags) y_train = encoder.transform(train_tags) y_test = encoder.transform(test_tags) num_classes = np.max(y_train) + 1 y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) batch_size = 2 # Normally it should be 32 or 64, smaller better but slower. Since sample size is too small, use small batch size can get better predicting result and still run very fast epochs = 10 # was 2. # Build the model model = Sequential() model.add(Dense(512, input_shape=(max_words,))) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(512)) model.add(Activation('sigmoid')) model.add(Dropout(0.5)) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1) predicted=[] score=[] questn=[] print(f'Number of training {train_size} rows is loaded') for i in range(len(test_posts)): classifyTest(test_posts.iloc[i]) df = pd.DataFrame({'Question':questn, 'Predicted Answer':predicted, 'Score':score, 'Real Answer':test_tags}) ansIs=df['Predicted Answer']==df['Real Answer'] df = pd.DataFrame({'Question':questn, 'Predicted Answer':predicted, 'Score':score, 'Real Answer':test_tags, 'Answer is ':ansIs}) def classify(string): a=[string] #print(a) questn.append(string) x_test_new = tokenize.texts_to_matrix(a) # Here's how to generate a prediction on individual examples text_labels = encoder.classes_ prediction = model.predict(np.array([x_test_new[0]])) predicted_label = text_labels[np.argmax(prediction)] #print(test_posts.iloc[0][:50], "...") #print('Actual label:' + test_tags.iloc[0]) print(prediction[0][np.argmax(prediction)]) #score.append(prediction[0][np.argmax(prediction)]) print("Predicted label: " + predicted_label + "\n") #predicted.append(predicted_label) return predicted_label import gradio as gr def greet(name): return classify(name) iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch()