import gradio as gr import numpy as np import numpy as np import pickle import pandas as pd from PRNN import PRNN from PRNN_utils import batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence import nltk nltk.download('punkt') from nltk.tokenize import word_tokenize def tokens_and_tags(sentence): # Sample sentence #sentence = "The quick brown fox jumps over the lazy dog" # Tokenize the sentence tokens = word_tokenize(sentence) # Tag the tokens with POS tagged_words = nltk.pos_tag(tokens) # Define the set of desired POS tags desired_tags = {'JJ', 'NN', 'DT'} # Initialize lists to store words and tags separately words = [] tags = [] # Iterate over tagged words and filter them for word, tag in tagged_words: if tag in desired_tags: words.append(word) tags.append(tag) else: words.append(word) tags.append('OT') # Print the lists of words and tags # print("Words:", words) # print("Tags:", tags) return words, tags def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']): liss = [] pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4} for tag in tags: liss.append(pos_dict[tag]) return liss def predict_for_example(sentence, tags, model): sent_pos_tags = create_pos_tags(tags) x = tags2sentence(sent_pos_tags) return model.predict_tags(x) def find_chunks(sentence, preds): chunks = [] chunk = [] for i in range(len(preds)): if preds[i] == 1: if len(chunk) > 1: chunks.append(" ".join(chunk)) chunk = [sentence[i]] elif preds[i] == 0 and chunk: chunk.append(sentence[i]) if len(chunk) > 1: chunks.append(" ".join(chunk)) return chunks model2 = PRNN() # Instantiate a model # Loading the dictionary from the file using pickle with open('CVresults_con_data.pkl', 'rb') as f: model_dict2 = pickle.load(f) P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False) model2.params = P_best2 model2.w = W_best2 model4 = PRNN() # Instantiate a model # Loading the dictionary from the file using pickle with open('CVresults_con_data_sigmoid.pkl', 'rb') as f: model_dict4 = pickle.load(f) P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False) model4.params = P_best4 model4.w = W_best4 def demo_(sentence): sentence, tags = tokens_and_tags(sentence) preds2=predict_for_example(sentence=sentence, tags=tags, model=model2) preds4=predict_for_example(sentence=sentence, tags=tags, model=model4) return str(preds2),str(preds4),str(find_chunks(sentence=sentence, preds=preds2)),str(find_chunks(sentence=sentence, preds=preds4)) title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification" demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks"), outputs=[gr.Textbox(label="predicted chunk label with step activation function"),gr.Textbox(label="predicted chunk label with sigmoid activation function"),gr.Textbox(label="predicted Noun chunk with step activation function"),gr.Textbox(label="predicted Noun chunk with sigmoid activation function")],title=title) demo.launch(share=True)