File size: 3,376 Bytes
44f2935
 
 
 
 
 
 
 
 
32aa11b
44f2935
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
import numpy as np
import numpy as np
import pickle
import pandas as pd
from PRNN import PRNN

from PRNN_utils import   batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize

def tokens_and_tags(sentence):
    # Sample sentence
    #sentence = "The quick brown fox jumps over the lazy dog"

    # Tokenize the sentence
    tokens = word_tokenize(sentence)

    # Tag the tokens with POS
    tagged_words = nltk.pos_tag(tokens)

    # Define the set of desired POS tags
    desired_tags = {'JJ', 'NN', 'DT'}

    # Initialize lists to store words and tags separately
    words = []
    tags = []

    # Iterate over tagged words and filter them
    for word, tag in tagged_words:
        if tag in desired_tags:
            words.append(word)
            tags.append(tag)
        else:
            words.append(word)
            tags.append('OT')

    # Print the lists of words and tags
    # print("Words:", words)
    # print("Tags:", tags)

    return words, tags

def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']):

    liss = []
    pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4}

    for tag in tags:
        liss.append(pos_dict[tag])
    
    return liss

def predict_for_example(sentence, tags, model):

    sent_pos_tags = create_pos_tags(tags)
    x = tags2sentence(sent_pos_tags)

    return model.predict_tags(x)

def find_chunks(sentence, preds):
    chunks = []
    chunk = []
    for i in range(len(preds)):
        if preds[i] == 1:
            if len(chunk) > 1:
                chunks.append(" ".join(chunk))
            chunk = [sentence[i]]
        elif preds[i] == 0 and chunk:
            chunk.append(sentence[i])
    if len(chunk) > 1:
        chunks.append(" ".join(chunk))
    return chunks


model2 = PRNN()  # Instantiate a model

# Loading the dictionary from the file using pickle
with open('CVresults_con_data.pkl', 'rb') as f:
    model_dict2 = pickle.load(f)

P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False)
model2.params = P_best2
model2.w = W_best2

model4 = PRNN()  # Instantiate a model

# Loading the dictionary from the file using pickle
with open('CVresults_con_data_sigmoid.pkl', 'rb') as f:
    model_dict4 = pickle.load(f)

P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False)
model4.params = P_best4
model4.w = W_best4

def demo_(sentence):
  sentence, tags = tokens_and_tags(sentence)
  preds2=predict_for_example(sentence=sentence, tags=tags, model=model2)
  preds4=predict_for_example(sentence=sentence, tags=tags, model=model4)
  return str(preds2),str(preds4),str(find_chunks(sentence=sentence, preds=preds2)),str(find_chunks(sentence=sentence, preds=preds4))

title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification"
demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks"), outputs=[gr.Textbox(label="predicted chunk label with step activation function"),gr.Textbox(label="predicted chunk label with sigmoid activation function"),gr.Textbox(label="predicted Noun chunk with step activation function"),gr.Textbox(label="predicted Noun chunk with sigmoid activation function")],title=title)

demo.launch(share=True)