Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
import pandas as pd
|
6 |
+
from PRNN import PRNN
|
7 |
+
|
8 |
+
from PRNN_utils import batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence
|
9 |
+
import nltk
|
10 |
+
from nltk.tokenize import word_tokenize
|
11 |
+
|
12 |
+
def tokens_and_tags(sentence):
|
13 |
+
# Sample sentence
|
14 |
+
#sentence = "The quick brown fox jumps over the lazy dog"
|
15 |
+
|
16 |
+
# Tokenize the sentence
|
17 |
+
tokens = word_tokenize(sentence)
|
18 |
+
|
19 |
+
# Tag the tokens with POS
|
20 |
+
tagged_words = nltk.pos_tag(tokens)
|
21 |
+
|
22 |
+
# Define the set of desired POS tags
|
23 |
+
desired_tags = {'JJ', 'NN', 'DT'}
|
24 |
+
|
25 |
+
# Initialize lists to store words and tags separately
|
26 |
+
words = []
|
27 |
+
tags = []
|
28 |
+
|
29 |
+
# Iterate over tagged words and filter them
|
30 |
+
for word, tag in tagged_words:
|
31 |
+
if tag in desired_tags:
|
32 |
+
words.append(word)
|
33 |
+
tags.append(tag)
|
34 |
+
else:
|
35 |
+
words.append(word)
|
36 |
+
tags.append('OT')
|
37 |
+
|
38 |
+
# Print the lists of words and tags
|
39 |
+
# print("Words:", words)
|
40 |
+
# print("Tags:", tags)
|
41 |
+
|
42 |
+
return words, tags
|
43 |
+
|
44 |
+
def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']):
|
45 |
+
|
46 |
+
liss = []
|
47 |
+
pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4}
|
48 |
+
|
49 |
+
for tag in tags:
|
50 |
+
liss.append(pos_dict[tag])
|
51 |
+
|
52 |
+
return liss
|
53 |
+
|
54 |
+
def predict_for_example(sentence, tags, model):
|
55 |
+
|
56 |
+
sent_pos_tags = create_pos_tags(tags)
|
57 |
+
x = tags2sentence(sent_pos_tags)
|
58 |
+
|
59 |
+
return model.predict_tags(x)
|
60 |
+
|
61 |
+
def find_chunks(sentence, preds):
|
62 |
+
chunks = []
|
63 |
+
chunk = []
|
64 |
+
for i in range(len(preds)):
|
65 |
+
if preds[i] == 1:
|
66 |
+
if len(chunk) > 1:
|
67 |
+
chunks.append(" ".join(chunk))
|
68 |
+
chunk = [sentence[i]]
|
69 |
+
elif preds[i] == 0 and chunk:
|
70 |
+
chunk.append(sentence[i])
|
71 |
+
if len(chunk) > 1:
|
72 |
+
chunks.append(" ".join(chunk))
|
73 |
+
return chunks
|
74 |
+
|
75 |
+
|
76 |
+
model2 = PRNN() # Instantiate a model
|
77 |
+
|
78 |
+
# Loading the dictionary from the file using pickle
|
79 |
+
with open('CVresults_con_data.pkl', 'rb') as f:
|
80 |
+
model_dict2 = pickle.load(f)
|
81 |
+
|
82 |
+
P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False)
|
83 |
+
model2.params = P_best2
|
84 |
+
model2.w = W_best2
|
85 |
+
|
86 |
+
model4 = PRNN() # Instantiate a model
|
87 |
+
|
88 |
+
# Loading the dictionary from the file using pickle
|
89 |
+
with open('CVresults_con_data_sigmoid.pkl', 'rb') as f:
|
90 |
+
model_dict4 = pickle.load(f)
|
91 |
+
|
92 |
+
P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False)
|
93 |
+
model4.params = P_best4
|
94 |
+
model4.w = W_best4
|
95 |
+
|
96 |
+
def demo_(sentence):
|
97 |
+
sentence, tags = tokens_and_tags(sentence)
|
98 |
+
preds2=predict_for_example(sentence=sentence, tags=tags, model=model2)
|
99 |
+
preds4=predict_for_example(sentence=sentence, tags=tags, model=model4)
|
100 |
+
return str(preds2),str(preds4),str(find_chunks(sentence=sentence, preds=preds2)),str(find_chunks(sentence=sentence, preds=preds4))
|
101 |
+
|
102 |
+
title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification"
|
103 |
+
demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks"), outputs=[gr.Textbox(label="predicted chunk label with step activation function"),gr.Textbox(label="predicted chunk label with sigmoid activation function"),gr.Textbox(label="predicted Noun chunk with step activation function"),gr.Textbox(label="predicted Noun chunk with sigmoid activation function")],title=title)
|
104 |
+
|
105 |
+
demo.launch(share=True)
|