Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
import numpy as np | |
import pickle | |
import pandas as pd | |
from PRNN import PRNN | |
from PRNN_utils import batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence | |
import nltk | |
from nltk.tokenize import word_tokenize | |
def tokens_and_tags(sentence): | |
# Sample sentence | |
#sentence = "The quick brown fox jumps over the lazy dog" | |
# Tokenize the sentence | |
tokens = word_tokenize(sentence) | |
# Tag the tokens with POS | |
tagged_words = nltk.pos_tag(tokens) | |
# Define the set of desired POS tags | |
desired_tags = {'JJ', 'NN', 'DT'} | |
# Initialize lists to store words and tags separately | |
words = [] | |
tags = [] | |
# Iterate over tagged words and filter them | |
for word, tag in tagged_words: | |
if tag in desired_tags: | |
words.append(word) | |
tags.append(tag) | |
else: | |
words.append(word) | |
tags.append('OT') | |
# Print the lists of words and tags | |
# print("Words:", words) | |
# print("Tags:", tags) | |
return words, tags | |
def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']): | |
liss = [] | |
pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4} | |
for tag in tags: | |
liss.append(pos_dict[tag]) | |
return liss | |
def predict_for_example(sentence, tags, model): | |
sent_pos_tags = create_pos_tags(tags) | |
x = tags2sentence(sent_pos_tags) | |
return model.predict_tags(x) | |
def find_chunks(sentence, preds): | |
chunks = [] | |
chunk = [] | |
for i in range(len(preds)): | |
if preds[i] == 1: | |
if len(chunk) > 1: | |
chunks.append(" ".join(chunk)) | |
chunk = [sentence[i]] | |
elif preds[i] == 0 and chunk: | |
chunk.append(sentence[i]) | |
if len(chunk) > 1: | |
chunks.append(" ".join(chunk)) | |
return chunks | |
model2 = PRNN() # Instantiate a model | |
# Loading the dictionary from the file using pickle | |
with open('CVresults_con_data.pkl', 'rb') as f: | |
model_dict2 = pickle.load(f) | |
P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False) | |
model2.params = P_best2 | |
model2.w = W_best2 | |
model4 = PRNN() # Instantiate a model | |
# Loading the dictionary from the file using pickle | |
with open('CVresults_con_data_sigmoid.pkl', 'rb') as f: | |
model_dict4 = pickle.load(f) | |
P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False) | |
model4.params = P_best4 | |
model4.w = W_best4 | |
def demo_(sentence): | |
sentence, tags = tokens_and_tags(sentence) | |
preds2=predict_for_example(sentence=sentence, tags=tags, model=model2) | |
preds4=predict_for_example(sentence=sentence, tags=tags, model=model4) | |
return str(preds2),str(preds4),str(find_chunks(sentence=sentence, preds=preds2)),str(find_chunks(sentence=sentence, preds=preds4)) | |
title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification" | |
demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks"), outputs=[gr.Textbox(label="predicted chunk label with step activation function"),gr.Textbox(label="predicted chunk label with sigmoid activation function"),gr.Textbox(label="predicted Noun chunk with step activation function"),gr.Textbox(label="predicted Noun chunk with sigmoid activation function")],title=title) | |
demo.launch(share=True) |