Spaces:
Sleeping
Sleeping
File size: 3,353 Bytes
44f2935 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import gradio as gr
import numpy as np
import numpy as np
import pickle
import pandas as pd
from PRNN import PRNN
from PRNN_utils import batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence
import nltk
from nltk.tokenize import word_tokenize
def tokens_and_tags(sentence):
# Sample sentence
#sentence = "The quick brown fox jumps over the lazy dog"
# Tokenize the sentence
tokens = word_tokenize(sentence)
# Tag the tokens with POS
tagged_words = nltk.pos_tag(tokens)
# Define the set of desired POS tags
desired_tags = {'JJ', 'NN', 'DT'}
# Initialize lists to store words and tags separately
words = []
tags = []
# Iterate over tagged words and filter them
for word, tag in tagged_words:
if tag in desired_tags:
words.append(word)
tags.append(tag)
else:
words.append(word)
tags.append('OT')
# Print the lists of words and tags
# print("Words:", words)
# print("Tags:", tags)
return words, tags
def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']):
liss = []
pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4}
for tag in tags:
liss.append(pos_dict[tag])
return liss
def predict_for_example(sentence, tags, model):
sent_pos_tags = create_pos_tags(tags)
x = tags2sentence(sent_pos_tags)
return model.predict_tags(x)
def find_chunks(sentence, preds):
chunks = []
chunk = []
for i in range(len(preds)):
if preds[i] == 1:
if len(chunk) > 1:
chunks.append(" ".join(chunk))
chunk = [sentence[i]]
elif preds[i] == 0 and chunk:
chunk.append(sentence[i])
if len(chunk) > 1:
chunks.append(" ".join(chunk))
return chunks
model2 = PRNN() # Instantiate a model
# Loading the dictionary from the file using pickle
with open('CVresults_con_data.pkl', 'rb') as f:
model_dict2 = pickle.load(f)
P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False)
model2.params = P_best2
model2.w = W_best2
model4 = PRNN() # Instantiate a model
# Loading the dictionary from the file using pickle
with open('CVresults_con_data_sigmoid.pkl', 'rb') as f:
model_dict4 = pickle.load(f)
P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False)
model4.params = P_best4
model4.w = W_best4
def demo_(sentence):
sentence, tags = tokens_and_tags(sentence)
preds2=predict_for_example(sentence=sentence, tags=tags, model=model2)
preds4=predict_for_example(sentence=sentence, tags=tags, model=model4)
return str(preds2),str(preds4),str(find_chunks(sentence=sentence, preds=preds2)),str(find_chunks(sentence=sentence, preds=preds4))
title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification"
demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks"), outputs=[gr.Textbox(label="predicted chunk label with step activation function"),gr.Textbox(label="predicted chunk label with sigmoid activation function"),gr.Textbox(label="predicted Noun chunk with step activation function"),gr.Textbox(label="predicted Noun chunk with sigmoid activation function")],title=title)
demo.launch(share=True) |