vivek9's picture
Update app.py
b3ad09c verified
raw
history blame
5.5 kB
import gradio as gr
import numpy as np
import numpy as np
import pickle
import pandas as pd
from PRNN import PRNN
from PRNN_utils import batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence
import nltk
nltk.download('all')
from nltk.tokenize import word_tokenize
def tokens_and_tags(sentence):
# Sample sentence
#sentence = "The quick brown fox jumps over the lazy dog"
# Tokenize the sentence
tokens = word_tokenize(sentence)
# Tag the tokens with POS
tagged_words = nltk.pos_tag(tokens)
# Define the set of desired POS tags
desired_tags = {'JJ', 'NN', 'DT'}
# Initialize lists to store words and tags separately
words = []
tags = []
# Iterate over tagged words and filter them
for word, tag in tagged_words:
if tag in desired_tags:
words.append(word)
tags.append(tag)
else:
words.append(word)
tags.append('OT')
# Print the lists of words and tags
# print("Words:", words)
# print("Tags:", tags)
return words, tags
def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']):
liss = []
pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4}
for tag in tags:
liss.append(pos_dict[tag])
return liss
def predict_for_example(sentence, tags, model):
sent_pos_tags = create_pos_tags(tags)
x = tags2sentence(sent_pos_tags)
return model.predict_tags(x)
def get_noun_chunks(tokens, pos_tags, chunk_tags):
sequences = []
noun_chunks = []
noun_chunks_pos_tags = []
noun_chunks_tags = []
start = None
i = 0
while i < len(chunk_tags):
if chunk_tags[i] == 1:
start = i
if pos_tags[i] == 'NN':
noun_chunks.append([tokens[i]])
noun_chunks_pos_tags.append([pos_tags[i]])
noun_chunks_tags.append([chunk_tags[i]])
while i+1<len(chunk_tags) and chunk_tags[i+1] == 0 and (start is not None):
i+=1
if (start is not None) and i > start:
noun_chunks.append(tokens[start:i+1])
noun_chunks_pos_tags.append(pos_tags[start:i+1])
noun_chunks_tags.append(chunk_tags[start:i+1])
start =None
i+=1
noun_chunks = [" ".join(i) for i in noun_chunks]
sequences = [noun_chunks,noun_chunks_pos_tags, noun_chunks_tags]
return sequences
model2 = PRNN() # Instantiate a model
# Loading the dictionary from the file using pickle
with open('CVresults_con_data.pkl', 'rb') as f:
model_dict2 = pickle.load(f)
P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False)
model2.params = P_best2
model2.w = W_best2
model4 = PRNN() # Instantiate a model
# Loading the dictionary from the file using pickle
with open('CVresults_con_data_sigmoid.pkl', 'rb') as f:
model_dict4 = pickle.load(f)
P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False)
model4.params = P_best4
model4.w = W_best4
model1 = PRNN() # Instantiate a model
# Loading the dictionary from the file using pickle
with open('CVresults_data.pkl', 'rb') as f:
model_dict1 = pickle.load(f)
P_best1, W_best1 = process_CVresults(CVresults_dict=model_dict1, summarize=False)
model1.params = P_best1
model1.w = W_best1
model3 = PRNN() # Instantiate a model
# Loading the dictionary from the file using pickle
with open('CVresults_data_sigmoid.pkl', 'rb') as f:
model_dict3 = pickle.load(f)
P_best3, W_best3 = process_CVresults(CVresults_dict=model_dict3, summarize=False)
model3.params = P_best3
model3.w = W_best3
def demo_(sentence):
sentence, tags = tokens_and_tags(sentence)
preds1=predict_for_example(sentence=sentence, tags=tags, model=model1)
preds3=predict_for_example(sentence=sentence, tags=tags, model=model3)
preds2=predict_for_example(sentence=sentence, tags=tags, model=model2)
preds4=predict_for_example(sentence=sentence, tags=tags, model=model4)
return "predicted labels:\t"+str(preds2)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds2)),"predicted labels:\t"+str(preds4)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds4)),"predicted labels:\t"+str(preds1)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds1)),"predicted labels:\t"+str(preds3)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds3)),tags
title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification"
demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks",lines=1, interactive=True, show_copy_button=True), outputs=[gr.Textbox(label="prediction on conditioned data with step activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on conditioned data with step activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on all data with step activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on whole data with sigmoid activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="pos tag label given by nltk library",lines=1, interactive=True, show_copy_button=True)],title=title)
demo.launch(share=True)