Spaces:
Runtime error
Runtime error
pranjal065
commited on
Commit
·
282bb56
1
Parent(s):
15dd55e
Update app.py
Browse files
app.py
CHANGED
@@ -1,161 +1,158 @@
|
|
1 |
-
#
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
#
|
11 |
-
#
|
12 |
-
#
|
13 |
-
|
14 |
-
import
|
15 |
-
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
#
|
47 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
# for s in subsentences:
|
49 |
-
#
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
def keyword_identify(subsentences):
|
120 |
-
class KeywordExtractor:
|
121 |
-
def __init__(self):
|
122 |
-
self.stop_words = set(stopwords.words('english'))
|
123 |
-
def extract_keywords(self, text):
|
124 |
-
# tokenize sentences
|
125 |
-
sentences = sent_tokenize(text)
|
126 |
-
# tokenize words and remove stop words
|
127 |
-
words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
|
128 |
-
# count word frequencies
|
129 |
-
word_freq = Counter(words)
|
130 |
-
# sort words by frequency
|
131 |
-
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
|
132 |
-
# return top 3 keywords
|
133 |
-
return [word[0] for word in sorted_words[:2]]
|
134 |
-
key = KeywordExtractor()
|
135 |
-
keywords=[]
|
136 |
-
for s in subsentences:
|
137 |
-
keyword = key.extract_keywords(s)
|
138 |
-
keywords.append(','.join(keyword))
|
139 |
-
return keywords
|
140 |
-
st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
|
141 |
import pandas as pd
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
sentiment = sentiment_score(subsentences)
|
150 |
-
intent = intent_identify(subsentences)
|
151 |
-
entity = entity_identify(subsentences)
|
152 |
-
keyword = keyword_identify(subsentences)
|
153 |
-
df = pd.DataFrame(
|
154 |
-
{
|
155 |
-
'subsentences': subsentences,
|
156 |
-
'sentiment and score': sentiment,
|
157 |
-
'intent': intent,
|
158 |
-
'entity' : entity,
|
159 |
-
'keyword' : keyword
|
160 |
-
})
|
161 |
-
st.dataframe(data=df, width=None, height=None,use_container_width=False)
|
|
|
1 |
+
# import nltk
|
2 |
+
# import math
|
3 |
+
# import torch
|
4 |
+
# # from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
5 |
+
# # from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
6 |
+
# from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
|
7 |
+
# from nltk.tokenize import word_tokenize, sent_tokenize
|
8 |
+
# from nltk.corpus import stopwords
|
9 |
+
# from collections import Counter
|
10 |
+
# from flair.data import Sentence
|
11 |
+
# from flair.models import SequenceTagger
|
12 |
+
# nltk.download('stopwords')
|
13 |
+
# nltk.download('punkt')
|
14 |
+
# import streamlit as st
|
15 |
+
|
16 |
+
# st.set_page_config(layout="wide")
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
# def divide_sentence(sentence):
|
21 |
+
# conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i']
|
22 |
+
# tokens = nltk.word_tokenize(sentence)
|
23 |
+
# subsentences = []
|
24 |
+
# current_subsentence = []
|
25 |
+
# for token in tokens:
|
26 |
+
# if token.lower() in conjunctions:
|
27 |
+
# if len(current_subsentence)>0:
|
28 |
+
# subsentences.append(" ".join(current_subsentence))
|
29 |
+
# current_subsentence = []
|
30 |
+
# else:
|
31 |
+
# current_subsentence.append(token)
|
32 |
+
# # Add the final subsentence to the list
|
33 |
+
# subsentences.append(" ".join(current_subsentence))
|
34 |
+
# # print(subsentences)
|
35 |
+
# # d={}
|
36 |
+
# # for s in subsentences:
|
37 |
+
# # d[s] = {'accuracy':None,}
|
38 |
+
# return subsentences
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
# def topic_identify(subsentences):
|
43 |
+
# def sigmoid(x):
|
44 |
+
# return 1 / (1 + math.exp(-x))
|
45 |
+
# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all")
|
46 |
+
# model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification")
|
47 |
+
# model.eval()
|
48 |
+
# class_mapping = model.config.id2label
|
49 |
+
# topics = []
|
50 |
+
# for text in subsentences:
|
51 |
+
# with torch.no_grad():
|
52 |
+
# tokens = tokenizer(text, return_tensors='pt')
|
53 |
+
# output = model(**tokens)
|
54 |
+
# flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()]
|
55 |
+
# topic = [class_mapping[n] for n, i in enumerate(flags) if i]
|
56 |
+
# topics.append(','.join(topic))
|
57 |
+
# return topics
|
58 |
+
|
59 |
+
|
60 |
+
# def sentiment_score(subsentences):
|
61 |
+
# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
|
62 |
+
# model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
|
63 |
+
# from transformers import pipeline
|
64 |
+
# sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
65 |
+
# senti = []
|
66 |
+
# for sen in subsentences:
|
67 |
+
# a=sentiment_task(sen)
|
68 |
+
# # [{'label': 'positive', 'score': 0.9484752416610718}]
|
69 |
+
# a=a[0]
|
70 |
+
# senti.append(a['label']+' , '+str(a['score']))
|
71 |
+
# return senti
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
# def intent_identify(subsentences):
|
76 |
+
# model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en'
|
77 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_name)
|
78 |
+
# model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
79 |
+
# classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
|
80 |
+
# intents = []
|
81 |
# for s in subsentences:
|
82 |
+
# res = classifier(s)
|
83 |
+
# a=res[0]
|
84 |
+
# intents.append(a['label']+' , '+str(a['score']))
|
85 |
+
# return intents
|
86 |
+
|
87 |
+
|
88 |
+
|
89 |
+
# def entity_identify(subsentences):
|
90 |
+
# # load the NER tagger
|
91 |
+
# tagger = SequenceTagger.load('ner')
|
92 |
+
# # create a sentence to analyze
|
93 |
+
# entities = []
|
94 |
+
# for sentence in subsentences:
|
95 |
+
# sentence = Sentence(sentence)
|
96 |
+
# # run NER on the sentence
|
97 |
+
# tagger.predict(sentence)
|
98 |
+
# # print the entities found in the sentence
|
99 |
+
# ent = []
|
100 |
+
# for entity in sentence.get_spans('ner'):
|
101 |
+
# ent.append(entity.text)
|
102 |
+
# entities.append(','.join(ent))
|
103 |
+
# return entities
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
+
# def keyword_identify(subsentences):
|
108 |
+
# class KeywordExtractor:
|
109 |
+
# def __init__(self):
|
110 |
+
# self.stop_words = set(stopwords.words('english'))
|
111 |
+
# def extract_keywords(self, text):
|
112 |
+
# # tokenize sentences
|
113 |
+
# sentences = sent_tokenize(text)
|
114 |
+
# # tokenize words and remove stop words
|
115 |
+
# words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
|
116 |
+
# # count word frequencies
|
117 |
+
# word_freq = Counter(words)
|
118 |
+
# # sort words by frequency
|
119 |
+
# sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
|
120 |
+
# # return top 3 keywords
|
121 |
+
# return [word[0] for word in sorted_words[:2]]
|
122 |
+
# key = KeywordExtractor()
|
123 |
+
# keywords=[]
|
124 |
+
# for s in subsentences:
|
125 |
+
# keyword = key.extract_keywords(s)
|
126 |
+
# keywords.append(','.join(keyword))
|
127 |
+
# return keywords
|
128 |
+
# st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
|
129 |
+
# import pandas as pd
|
130 |
+
# import numpy as np
|
131 |
+
# sent = st.text_input(label = 'Enter the Text:')
|
132 |
+
# button = st.button('submit')
|
133 |
+
# #sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA"
|
134 |
+
# if button:
|
135 |
+
# subsentences = divide_sentence(sent)
|
136 |
+
# topic = topic_identify(subsentences)
|
137 |
+
# sentiment = sentiment_score(subsentences)
|
138 |
+
# intent = intent_identify(subsentences)
|
139 |
+
# entity = entity_identify(subsentences)
|
140 |
+
# keyword = keyword_identify(subsentences)
|
141 |
+
# df = pd.DataFrame(
|
142 |
+
# {
|
143 |
+
# 'subsentences': subsentences,
|
144 |
+
# 'sentiment and score': sentiment,
|
145 |
+
# 'intent': intent,
|
146 |
+
# 'entity' : entity,
|
147 |
+
# 'keyword' : keyword
|
148 |
+
# })
|
149 |
+
# st.dataframe(data=df, width=None, height=None,use_container_width=False)
|
150 |
+
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
import pandas as pd
|
152 |
+
st.title(“A Simple Streamlit Web App”)
|
153 |
+
name = st.text_input(“Enter your name”, ‘’)
|
154 |
+
st.write(f”Hello {name}!”)
|
155 |
+
x = st.slider(“Select an integer x”, 0, 10, 1)
|
156 |
+
y = st.slider(“Select an integer y”, 0, 10, 1)
|
157 |
+
df = pd.DataFrame({“x”: [x], “y”: [y] , “x + y”: [x + y]}, index = [“addition row”])
|
158 |
+
st.write(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|