vivek9's picture
Create app.py
44f2935 verified
raw
history blame
3.35 kB
import gradio as gr
import numpy as np
import numpy as np
import pickle
import pandas as pd
from PRNN import PRNN
from PRNN_utils import batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence
import nltk
from nltk.tokenize import word_tokenize
def tokens_and_tags(sentence):
# Sample sentence
#sentence = "The quick brown fox jumps over the lazy dog"
# Tokenize the sentence
tokens = word_tokenize(sentence)
# Tag the tokens with POS
tagged_words = nltk.pos_tag(tokens)
# Define the set of desired POS tags
desired_tags = {'JJ', 'NN', 'DT'}
# Initialize lists to store words and tags separately
words = []
tags = []
# Iterate over tagged words and filter them
for word, tag in tagged_words:
if tag in desired_tags:
words.append(word)
tags.append(tag)
else:
words.append(word)
tags.append('OT')
# Print the lists of words and tags
# print("Words:", words)
# print("Tags:", tags)
return words, tags
def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']):
liss = []
pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4}
for tag in tags:
liss.append(pos_dict[tag])
return liss
def predict_for_example(sentence, tags, model):
sent_pos_tags = create_pos_tags(tags)
x = tags2sentence(sent_pos_tags)
return model.predict_tags(x)
def find_chunks(sentence, preds):
chunks = []
chunk = []
for i in range(len(preds)):
if preds[i] == 1:
if len(chunk) > 1:
chunks.append(" ".join(chunk))
chunk = [sentence[i]]
elif preds[i] == 0 and chunk:
chunk.append(sentence[i])
if len(chunk) > 1:
chunks.append(" ".join(chunk))
return chunks
model2 = PRNN() # Instantiate a model
# Loading the dictionary from the file using pickle
with open('CVresults_con_data.pkl', 'rb') as f:
model_dict2 = pickle.load(f)
P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False)
model2.params = P_best2
model2.w = W_best2
model4 = PRNN() # Instantiate a model
# Loading the dictionary from the file using pickle
with open('CVresults_con_data_sigmoid.pkl', 'rb') as f:
model_dict4 = pickle.load(f)
P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False)
model4.params = P_best4
model4.w = W_best4
def demo_(sentence):
sentence, tags = tokens_and_tags(sentence)
preds2=predict_for_example(sentence=sentence, tags=tags, model=model2)
preds4=predict_for_example(sentence=sentence, tags=tags, model=model4)
return str(preds2),str(preds4),str(find_chunks(sentence=sentence, preds=preds2)),str(find_chunks(sentence=sentence, preds=preds4))
title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification"
demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks"), outputs=[gr.Textbox(label="predicted chunk label with step activation function"),gr.Textbox(label="predicted chunk label with sigmoid activation function"),gr.Textbox(label="predicted Noun chunk with step activation function"),gr.Textbox(label="predicted Noun chunk with sigmoid activation function")],title=title)
demo.launch(share=True)