pranjal065 commited on
Commit
282bb56
·
1 Parent(s): 15dd55e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +156 -159
app.py CHANGED
@@ -1,161 +1,158 @@
1
- # -*- coding: utf-8 -*-
2
- """Untitled3.ipynb
3
-
4
- Automatically generated by Colaboratory.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/18DTgeDomshKNQMgYQ6y6mJbBom9mRw5l
8
- """
9
-
10
- # Commented out IPython magic to ensure Python compatibility.
11
- # %%writefile app.py
12
- # %%writefile 'app.py'
13
- import nltk
14
- import math
15
- import torch
16
- # from transformers import AutoModelForSequenceClassification, AutoTokenizer
17
- # from transformers import AutoTokenizer, AutoModelForSequenceClassification
18
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
19
- from nltk.tokenize import word_tokenize, sent_tokenize
20
- from nltk.corpus import stopwords
21
- from collections import Counter
22
- from flair.data import Sentence
23
- from flair.models import SequenceTagger
24
- nltk.download('stopwords')
25
- nltk.download('punkt')
26
- import streamlit as st
27
-
28
- st.set_page_config(layout="wide")
29
-
30
-
31
-
32
- def divide_sentence(sentence):
33
- conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i']
34
- tokens = nltk.word_tokenize(sentence)
35
- subsentences = []
36
- current_subsentence = []
37
- for token in tokens:
38
- if token.lower() in conjunctions:
39
- if len(current_subsentence)>0:
40
- subsentences.append(" ".join(current_subsentence))
41
- current_subsentence = []
42
- else:
43
- current_subsentence.append(token)
44
- # Add the final subsentence to the list
45
- subsentences.append(" ".join(current_subsentence))
46
- # print(subsentences)
47
- # d={}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  # for s in subsentences:
49
- # d[s] = {'accuracy':None,}
50
- return subsentences
51
-
52
-
53
-
54
- def topic_identify(subsentences):
55
- def sigmoid(x):
56
- return 1 / (1 + math.exp(-x))
57
- tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all")
58
- model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification")
59
- model.eval()
60
- class_mapping = model.config.id2label
61
- topics = []
62
- for text in subsentences:
63
- with torch.no_grad():
64
- tokens = tokenizer(text, return_tensors='pt')
65
- output = model(**tokens)
66
- flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()]
67
- topic = [class_mapping[n] for n, i in enumerate(flags) if i]
68
- topics.append(','.join(topic))
69
- return topics
70
-
71
-
72
- def sentiment_score(subsentences):
73
- tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
74
- model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
75
- from transformers import pipeline
76
- sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
77
- senti = []
78
- for sen in subsentences:
79
- a=sentiment_task(sen)
80
- # [{'label': 'positive', 'score': 0.9484752416610718}]
81
- a=a[0]
82
- senti.append(a['label']+' , '+str(a['score']))
83
- return senti
84
-
85
-
86
-
87
- def intent_identify(subsentences):
88
- model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en'
89
- tokenizer = AutoTokenizer.from_pretrained(model_name)
90
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
91
- classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
92
- intents = []
93
- for s in subsentences:
94
- res = classifier(s)
95
- a=res[0]
96
- intents.append(a['label']+' , '+str(a['score']))
97
- return intents
98
-
99
-
100
-
101
- def entity_identify(subsentences):
102
- # load the NER tagger
103
- tagger = SequenceTagger.load('ner')
104
- # create a sentence to analyze
105
- entities = []
106
- for sentence in subsentences:
107
- sentence = Sentence(sentence)
108
- # run NER on the sentence
109
- tagger.predict(sentence)
110
- # print the entities found in the sentence
111
- ent = []
112
- for entity in sentence.get_spans('ner'):
113
- ent.append(entity.text)
114
- entities.append(','.join(ent))
115
- return entities
116
-
117
-
118
-
119
- def keyword_identify(subsentences):
120
- class KeywordExtractor:
121
- def __init__(self):
122
- self.stop_words = set(stopwords.words('english'))
123
- def extract_keywords(self, text):
124
- # tokenize sentences
125
- sentences = sent_tokenize(text)
126
- # tokenize words and remove stop words
127
- words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
128
- # count word frequencies
129
- word_freq = Counter(words)
130
- # sort words by frequency
131
- sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
132
- # return top 3 keywords
133
- return [word[0] for word in sorted_words[:2]]
134
- key = KeywordExtractor()
135
- keywords=[]
136
- for s in subsentences:
137
- keyword = key.extract_keywords(s)
138
- keywords.append(','.join(keyword))
139
- return keywords
140
- st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
141
  import pandas as pd
142
- import numpy as np
143
- sent = st.text_input(label = 'Enter the Text:')
144
- button = st.button('submit')
145
- #sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA"
146
- if button:
147
- subsentences = divide_sentence(sent)
148
- topic = topic_identify(subsentences)
149
- sentiment = sentiment_score(subsentences)
150
- intent = intent_identify(subsentences)
151
- entity = entity_identify(subsentences)
152
- keyword = keyword_identify(subsentences)
153
- df = pd.DataFrame(
154
- {
155
- 'subsentences': subsentences,
156
- 'sentiment and score': sentiment,
157
- 'intent': intent,
158
- 'entity' : entity,
159
- 'keyword' : keyword
160
- })
161
- st.dataframe(data=df, width=None, height=None,use_container_width=False)
 
1
+ # import nltk
2
+ # import math
3
+ # import torch
4
+ # # from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
+ # # from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+ # from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
7
+ # from nltk.tokenize import word_tokenize, sent_tokenize
8
+ # from nltk.corpus import stopwords
9
+ # from collections import Counter
10
+ # from flair.data import Sentence
11
+ # from flair.models import SequenceTagger
12
+ # nltk.download('stopwords')
13
+ # nltk.download('punkt')
14
+ # import streamlit as st
15
+
16
+ # st.set_page_config(layout="wide")
17
+
18
+
19
+
20
+ # def divide_sentence(sentence):
21
+ # conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i']
22
+ # tokens = nltk.word_tokenize(sentence)
23
+ # subsentences = []
24
+ # current_subsentence = []
25
+ # for token in tokens:
26
+ # if token.lower() in conjunctions:
27
+ # if len(current_subsentence)>0:
28
+ # subsentences.append(" ".join(current_subsentence))
29
+ # current_subsentence = []
30
+ # else:
31
+ # current_subsentence.append(token)
32
+ # # Add the final subsentence to the list
33
+ # subsentences.append(" ".join(current_subsentence))
34
+ # # print(subsentences)
35
+ # # d={}
36
+ # # for s in subsentences:
37
+ # # d[s] = {'accuracy':None,}
38
+ # return subsentences
39
+
40
+
41
+
42
+ # def topic_identify(subsentences):
43
+ # def sigmoid(x):
44
+ # return 1 / (1 + math.exp(-x))
45
+ # tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all")
46
+ # model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification")
47
+ # model.eval()
48
+ # class_mapping = model.config.id2label
49
+ # topics = []
50
+ # for text in subsentences:
51
+ # with torch.no_grad():
52
+ # tokens = tokenizer(text, return_tensors='pt')
53
+ # output = model(**tokens)
54
+ # flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()]
55
+ # topic = [class_mapping[n] for n, i in enumerate(flags) if i]
56
+ # topics.append(','.join(topic))
57
+ # return topics
58
+
59
+
60
+ # def sentiment_score(subsentences):
61
+ # tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
62
+ # model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
63
+ # from transformers import pipeline
64
+ # sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
65
+ # senti = []
66
+ # for sen in subsentences:
67
+ # a=sentiment_task(sen)
68
+ # # [{'label': 'positive', 'score': 0.9484752416610718}]
69
+ # a=a[0]
70
+ # senti.append(a['label']+' , '+str(a['score']))
71
+ # return senti
72
+
73
+
74
+
75
+ # def intent_identify(subsentences):
76
+ # model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en'
77
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
78
+ # model = AutoModelForSequenceClassification.from_pretrained(model_name)
79
+ # classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
80
+ # intents = []
81
  # for s in subsentences:
82
+ # res = classifier(s)
83
+ # a=res[0]
84
+ # intents.append(a['label']+' , '+str(a['score']))
85
+ # return intents
86
+
87
+
88
+
89
+ # def entity_identify(subsentences):
90
+ # # load the NER tagger
91
+ # tagger = SequenceTagger.load('ner')
92
+ # # create a sentence to analyze
93
+ # entities = []
94
+ # for sentence in subsentences:
95
+ # sentence = Sentence(sentence)
96
+ # # run NER on the sentence
97
+ # tagger.predict(sentence)
98
+ # # print the entities found in the sentence
99
+ # ent = []
100
+ # for entity in sentence.get_spans('ner'):
101
+ # ent.append(entity.text)
102
+ # entities.append(','.join(ent))
103
+ # return entities
104
+
105
+
106
+
107
+ # def keyword_identify(subsentences):
108
+ # class KeywordExtractor:
109
+ # def __init__(self):
110
+ # self.stop_words = set(stopwords.words('english'))
111
+ # def extract_keywords(self, text):
112
+ # # tokenize sentences
113
+ # sentences = sent_tokenize(text)
114
+ # # tokenize words and remove stop words
115
+ # words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
116
+ # # count word frequencies
117
+ # word_freq = Counter(words)
118
+ # # sort words by frequency
119
+ # sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
120
+ # # return top 3 keywords
121
+ # return [word[0] for word in sorted_words[:2]]
122
+ # key = KeywordExtractor()
123
+ # keywords=[]
124
+ # for s in subsentences:
125
+ # keyword = key.extract_keywords(s)
126
+ # keywords.append(','.join(keyword))
127
+ # return keywords
128
+ # st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
129
+ # import pandas as pd
130
+ # import numpy as np
131
+ # sent = st.text_input(label = 'Enter the Text:')
132
+ # button = st.button('submit')
133
+ # #sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA"
134
+ # if button:
135
+ # subsentences = divide_sentence(sent)
136
+ # topic = topic_identify(subsentences)
137
+ # sentiment = sentiment_score(subsentences)
138
+ # intent = intent_identify(subsentences)
139
+ # entity = entity_identify(subsentences)
140
+ # keyword = keyword_identify(subsentences)
141
+ # df = pd.DataFrame(
142
+ # {
143
+ # 'subsentences': subsentences,
144
+ # 'sentiment and score': sentiment,
145
+ # 'intent': intent,
146
+ # 'entity' : entity,
147
+ # 'keyword' : keyword
148
+ # })
149
+ # st.dataframe(data=df, width=None, height=None,use_container_width=False)
150
+ import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  import pandas as pd
152
+ st.title(“A Simple Streamlit Web App”)
153
+ name = st.text_input(“Enter your name”, ‘’)
154
+ st.write(f”Hello {name}!”)
155
+ x = st.slider(“Select an integer x”, 0, 10, 1)
156
+ y = st.slider(“Select an integer y”, 0, 10, 1)
157
+ df = pd.DataFrame({“x”: [x], “y”: [y] , “x + y”: [x + y]}, index = [“addition row”])
158
+ st.write(df)