File size: 3,353 Bytes
44f2935
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import gradio as gr
import numpy as np
import numpy as np
import pickle
import pandas as pd
from PRNN import PRNN

from PRNN_utils import   batch_calculate_grads, check_conditions, check_all_conditions, train_and_val, prepare_folds, process_CVresults, tags2sentence
import nltk
from nltk.tokenize import word_tokenize

def tokens_and_tags(sentence):
    # Sample sentence
    #sentence = "The quick brown fox jumps over the lazy dog"

    # Tokenize the sentence
    tokens = word_tokenize(sentence)

    # Tag the tokens with POS
    tagged_words = nltk.pos_tag(tokens)

    # Define the set of desired POS tags
    desired_tags = {'JJ', 'NN', 'DT'}

    # Initialize lists to store words and tags separately
    words = []
    tags = []

    # Iterate over tagged words and filter them
    for word, tag in tagged_words:
        if tag in desired_tags:
            words.append(word)
            tags.append(tag)
        else:
            words.append(word)
            tags.append('OT')

    # Print the lists of words and tags
    # print("Words:", words)
    # print("Tags:", tags)

    return words, tags

def create_pos_tags(tags = ['NN', 'JJ', 'DT', 'OT']):

    liss = []
    pos_dict = {'NN':1, 'DT':2, 'JJ':3, 'OT':4}

    for tag in tags:
        liss.append(pos_dict[tag])
    
    return liss

def predict_for_example(sentence, tags, model):

    sent_pos_tags = create_pos_tags(tags)
    x = tags2sentence(sent_pos_tags)

    return model.predict_tags(x)

def find_chunks(sentence, preds):
    chunks = []
    chunk = []
    for i in range(len(preds)):
        if preds[i] == 1:
            if len(chunk) > 1:
                chunks.append(" ".join(chunk))
            chunk = [sentence[i]]
        elif preds[i] == 0 and chunk:
            chunk.append(sentence[i])
    if len(chunk) > 1:
        chunks.append(" ".join(chunk))
    return chunks


model2 = PRNN()  # Instantiate a model

# Loading the dictionary from the file using pickle
with open('CVresults_con_data.pkl', 'rb') as f:
    model_dict2 = pickle.load(f)

P_best2, W_best2 = process_CVresults(CVresults_dict=model_dict2, summarize=False)
model2.params = P_best2
model2.w = W_best2

model4 = PRNN()  # Instantiate a model

# Loading the dictionary from the file using pickle
with open('CVresults_con_data_sigmoid.pkl', 'rb') as f:
    model_dict4 = pickle.load(f)

P_best4, W_best4 = process_CVresults(CVresults_dict=model_dict4, summarize=False)
model4.params = P_best4
model4.w = W_best4

def demo_(sentence):
  sentence, tags = tokens_and_tags(sentence)
  preds2=predict_for_example(sentence=sentence, tags=tags, model=model2)
  preds4=predict_for_example(sentence=sentence, tags=tags, model=model4)
  return str(preds2),str(preds4),str(find_chunks(sentence=sentence, preds=preds2)),str(find_chunks(sentence=sentence, preds=preds4))

title="POS-Tagged Corpus Analysis: Training a Recurrent Perceptron for Noun Chunk Identification"
demo = gr.Interface(fn=demo_, inputs=gr.Textbox(label="sentence for which you want noun chunks"), outputs=[gr.Textbox(label="predicted chunk label with step activation function"),gr.Textbox(label="predicted chunk label with sigmoid activation function"),gr.Textbox(label="predicted Noun chunk with step activation function"),gr.Textbox(label="predicted Noun chunk with sigmoid activation function")],title=title)

demo.launch(share=True)