vivek9 commited on
Commit
44f2935
·
verified ·
1 Parent(s): ed9b391

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import numpy as np
4
+ import pickle
5
+ import pandas as pd
6
+ from PRNN import PRNN
7
+
8
+ from PRNN_utils import batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence
9
+ import nltk
10
+ from nltk.tokenize import word_tokenize
11
+
12
+ def tokens_and_tags(sentence):
13
+ # Sample sentence
14
+ #sentence = "The quick brown fox jumps over the lazy dog"
15
+
16
+ # Tokenize the sentence
17
+ tokens = word_tokenize(sentence)
18
+
19
+ # Tag the tokens with POS
20
+ tagged_words = nltk.pos_tag(tokens)
21
+
22
+ # Define the set of desired POS tags
23
+ desired_tags = {'JJ', 'NN', 'DT'}
24
+
25
+ # Initialize lists to store words and tags separately
26
+ words = []
27
+ tags = []
28
+
29
+ # Iterate over tagged words and filter them
30
+ for word, tag in tagged_words:
31
+ if tag in desired_tags:
32
+ words.append(word)
33
+ tags.append(tag)
34
+ else:
35
+ words.append(word)
36
+ tags.append('OT')
37
+
38
+ # Print the lists of words and tags
39
+ # print("Words:", words)
40
+ # print("Tags:", tags)
41
+
42
+ return words, tags
43
+
44
+ def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']):
45
+
46
+ liss = []
47
+ pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4}
48
+
49
+ for tag in tags:
50
+ liss.append(pos_dict[tag])
51
+
52
+ return liss
53
+
54
+ def predict_for_example(sentence, tags, model):
55
+
56
+ sent_pos_tags = create_pos_tags(tags)
57
+ x = tags2sentence(sent_pos_tags)
58
+
59
+ return model.predict_tags(x)
60
+
61
+ def find_chunks(sentence, preds):
62
+ chunks = []
63
+ chunk = []
64
+ for i in range(len(preds)):
65
+ if preds[i] == 1:
66
+ if len(chunk) > 1:
67
+ chunks.append(" ".join(chunk))
68
+ chunk = [sentence[i]]
69
+ elif preds[i] == 0 and chunk:
70
+ chunk.append(sentence[i])
71
+ if len(chunk) > 1:
72
+ chunks.append(" ".join(chunk))
73
+ return chunks
74
+
75
+
76
+ model2 = PRNN() # Instantiate a model
77
+
78
+ # Loading the dictionary from the file using pickle
79
+ with open('CVresults_con_data.pkl', 'rb') as f:
80
+ model_dict2 = pickle.load(f)
81
+
82
+ P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False)
83
+ model2.params = P_best2
84
+ model2.w = W_best2
85
+
86
+ model4 = PRNN() # Instantiate a model
87
+
88
+ # Loading the dictionary from the file using pickle
89
+ with open('CVresults_con_data_sigmoid.pkl', 'rb') as f:
90
+ model_dict4 = pickle.load(f)
91
+
92
+ P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False)
93
+ model4.params = P_best4
94
+ model4.w = W_best4
95
+
96
+ def demo_(sentence):
97
+ sentence, tags = tokens_and_tags(sentence)
98
+ preds2=predict_for_example(sentence=sentence, tags=tags, model=model2)
99
+ preds4=predict_for_example(sentence=sentence, tags=tags, model=model4)
100
+ return str(preds2),str(preds4),str(find_chunks(sentence=sentence, preds=preds2)),str(find_chunks(sentence=sentence, preds=preds4))
101
+
102
+ title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification"
103
+ demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks"), outputs=[gr.Textbox(label="predicted chunk label with step activation function"),gr.Textbox(label="predicted chunk label with sigmoid activation function"),gr.Textbox(label="predicted Noun chunk with step activation function"),gr.Textbox(label="predicted Noun chunk with sigmoid activation function")],title=title)
104
+
105
+ demo.launch(share=True)