File size: 5,497 Bytes
44f2935
 
 
 
 
 
 
 
 
62ac107
44f2935
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f71427f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44f2935
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f71427f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44f2935
 
f71427f
 
44f2935
 
b3ad09c
44f2935
 
f71427f
44f2935
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import gradio as gr
import numpy as np
import numpy as np
import pickle
import pandas as pd
from PRNN import PRNN

from PRNN_utils import   batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence
import nltk
nltk.download('all')
from nltk.tokenize import word_tokenize

def tokens_and_tags(sentence):
    # Sample sentence
    #sentence = "The quick brown fox jumps over the lazy dog"

    # Tokenize the sentence
    tokens = word_tokenize(sentence)

    # Tag the tokens with POS
    tagged_words = nltk.pos_tag(tokens)

    # Define the set of desired POS tags
    desired_tags = {'JJ', 'NN', 'DT'}

    # Initialize lists to store words and tags separately
    words = []
    tags = []

    # Iterate over tagged words and filter them
    for word, tag in tagged_words:
        if tag in desired_tags:
            words.append(word)
            tags.append(tag)
        else:
            words.append(word)
            tags.append('OT')

    # Print the lists of words and tags
    # print("Words:", words)
    # print("Tags:", tags)

    return words, tags

def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']):

    liss = []
    pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4}

    for tag in tags:
        liss.append(pos_dict[tag])
    
    return liss

def predict_for_example(sentence, tags, model):

    sent_pos_tags = create_pos_tags(tags)
    x = tags2sentence(sent_pos_tags)

    return model.predict_tags(x)

def get_noun_chunks(tokens, pos_tags, chunk_tags):
    

    sequences = []
    noun_chunks = []
    noun_chunks_pos_tags = []
    noun_chunks_tags = []
    start = None
    i = 0

    while i < len(chunk_tags):
        
        if chunk_tags[i] == 1:
            start = i
            if pos_tags[i] == 'NN':
                noun_chunks.append([tokens[i]])
                noun_chunks_pos_tags.append([pos_tags[i]])
                noun_chunks_tags.append([chunk_tags[i]])

        
        while i+1<len(chunk_tags) and chunk_tags[i+1] == 0 and (start is not None):
            i+=1

        if  (start is not None) and i >  start:
            noun_chunks.append(tokens[start:i+1])
            noun_chunks_pos_tags.append(pos_tags[start:i+1])
            noun_chunks_tags.append(chunk_tags[start:i+1])
            start =None
        
        i+=1
    noun_chunks = [" ".join(i) for i in noun_chunks]
    sequences = [noun_chunks,noun_chunks_pos_tags, noun_chunks_tags]
    return sequences



model2 = PRNN()  # Instantiate a model

# Loading the dictionary from the file using pickle
with open('CVresults_con_data.pkl', 'rb') as f:
    model_dict2 = pickle.load(f)

P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False)
model2.params = P_best2
model2.w = W_best2

model4 = PRNN()  # Instantiate a model

# Loading the dictionary from the file using pickle
with open('CVresults_con_data_sigmoid.pkl', 'rb') as f:
    model_dict4 = pickle.load(f)

P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False)
model4.params = P_best4
model4.w = W_best4

model1 = PRNN()  # Instantiate a model

# Loading the dictionary from the file using pickle
with open('CVresults_data.pkl', 'rb') as f:
    model_dict1 = pickle.load(f)

P_best1, W_best1 = process_CVresults(CVresults_dict=model_dict1, summarize=False)
model1.params = P_best1
model1.w = W_best1

model3 = PRNN()  # Instantiate a model

# Loading the dictionary from the file using pickle
with open('CVresults_data_sigmoid.pkl', 'rb') as f:
    model_dict3 = pickle.load(f)

P_best3, W_best3 = process_CVresults(CVresults_dict=model_dict3, summarize=False)
model3.params = P_best3
model3.w = W_best3



def demo_(sentence):
  sentence, tags = tokens_and_tags(sentence)
  preds1=predict_for_example(sentence=sentence, tags=tags, model=model1)
  preds3=predict_for_example(sentence=sentence, tags=tags, model=model3)
  preds2=predict_for_example(sentence=sentence, tags=tags, model=model2)
  preds4=predict_for_example(sentence=sentence, tags=tags, model=model4)
  return "predicted labels:\t"+str(preds2)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds2)),"predicted labels:\t"+str(preds4)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds4)),"predicted labels:\t"+str(preds1)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds1)),"predicted labels:\t"+str(preds3)+"\n"+"predicted Noun chunks \t"+str(get_noun_chunks(sentence=sentence, tags=tags,preds=preds3)),tags

title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification"
demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks",lines=1, interactive=True, show_copy_button=True), outputs=[gr.Textbox(label="prediction on conditioned data with step activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on conditioned data with step activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on all data with step activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="prediction on whole data with sigmoid activation function",lines=2, interactive=True, show_copy_button=True),gr.Textbox(label="pos tag label given by nltk library",lines=1, interactive=True, show_copy_button=True)],title=title)

demo.launch(share=True)