Spaces:

vivek9
/

CS772_Assignment2

Sleeping

App Files Files Community

CS772_Assignment2 / app.py

vivek9

Update app.py

38119cc verified over 1 year ago

raw

history blame contribute delete

5.55 kB

	import gradio as gr
	import numpy as np
	import numpy as np
	import pickle
	import pandas as pd
	from PRNN import PRNN

	from PRNN_utils import batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence
	import nltk
	nltk.download('all')
	from nltk.tokenize import word_tokenize

	def tokens_and_tags(sentence):
	# Sample sentence
	#sentence = "The quick brown fox jumps over the lazy dog"

	# Tokenize the sentence
	tokens = word_tokenize(sentence)

	# Tag the tokens with POS
	tagged_words = nltk.pos_tag(tokens)

	# Define the set of desired POS tags
	desired_tags = {'JJ', 'NN', 'DT'}

	# Initialize lists to store words and tags separately
	words = []
	tags = []

	# Iterate over tagged words and filter them
	for word, tag in tagged_words:
	if tag in desired_tags:
	words.append(word)
	tags.append(tag)
	else:
	words.append(word)
	tags.append('OT')

	# Print the lists of words and tags
	# print("Words:", words)
	# print("Tags:", tags)

	return words, tags

	def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']):

	liss = []
	pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4}

	for tag in tags:
	liss.append(pos_dict[tag])

	return liss

	def predict_for_example(sentence, tags, model):

	sent_pos_tags = create_pos_tags(tags)
	x = tags2sentence(sent_pos_tags)

	return model.predict_tags(x)

	def get_noun_chunks(sentence, tags, preds):
	tokens=sentence
	pos_tags=tags
	chunk_tags=preds

	sequences = []
	noun_chunks = []
	noun_chunks_pos_tags = []
	noun_chunks_tags = []
	start = None
	i = 0

	while i < len(chunk_tags):

	if chunk_tags[i] == 1:
	start = i
	if pos_tags[i] == 'NN':
	noun_chunks.append([tokens[i]])
	noun_chunks_pos_tags.append([pos_tags[i]])
	noun_chunks_tags.append([chunk_tags[i]])


	while i+1<len(chunk_tags) and chunk_tags[i+1] == 0 and (start is not None):
	i+=1

	if (start is not None) and i > start:
	noun_chunks.append(tokens[start:i+1])
	noun_chunks_pos_tags.append(pos_tags[start:i+1])
	noun_chunks_tags.append(chunk_tags[start:i+1])
	start =None

	i+=1
	noun_chunks = [" ".join(i) for i in noun_chunks]
	sequences = [noun_chunks,noun_chunks_pos_tags, noun_chunks_tags]
	return sequences[0]



	model2 = PRNN() # Instantiate a model

	# Loading the dictionary from the file using pickle
	with open('CVresults_con_data.pkl', 'rb') as f:
	model_dict2 = pickle.load(f)

	P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False)
	model2.params = P_best2
	model2.w = W_best2

	model4 = PRNN() # Instantiate a model

	# Loading the dictionary from the file using pickle
	with open('CVresults_con_data_sigmoid.pkl', 'rb') as f:
	model_dict4 = pickle.load(f)

	P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False)
	model4.params = P_best4
	model4.w = W_best4

	model1 = PRNN() # Instantiate a model

	# Loading the dictionary from the file using pickle
	with open('CVresults_data.pkl', 'rb') as f:
	model_dict1 = pickle.load(f)

	P_best1, W_best1 = process_CVresults(CVresults_dict=model_dict1, summarize=False)
	model1.params = P_best1
	model1.w = W_best1

	model3 = PRNN() # Instantiate a model

	# Loading the dictionary from the file using pickle
	with open('CVresults_data_sigmoid.pkl', 'rb') as f:
	model_dict3 = pickle.load(f)

	P_best3, W_best3 = process_CVresults(CVresults_dict=model_dict3, summarize=False)
	model3.params = P_best3
	model3.w = W_best3



	def demo_(sentence):
	sentence, tags = tokens_and_tags(sentence)
	preds1=predict_for_example(sentence=sentence, tags=tags, model=model1)
	preds3=predict_for_example(sentence=sentence, tags=tags, model=model3)
	preds2=predict_for_example(sentence=sentence, tags=tags, model=model2)
	preds4=predict_for_example(sentence=sentence, tags=tags, model=model4)
	return "predicted labels:\t"+str(preds2)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds2)),"predicted labels:\t"+str(preds4)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds4)),"predicted labels:\t"+str(preds1)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds1)),"predicted labels:\t"+str(preds3)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds3)),tags

	title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification"
	demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks",lines=1, interactive=True, show_copy_button=True), outputs=[gr.Textbox(label="prediction on conditioned data with step activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on conditioned data with sigmoid activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on all data with step activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on all data with sigmoid activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="pos tag label given by nltk library",lines=1, interactive=True, show_copy_button=True)],title=title)

	demo.launch(share=True)