Spaces:

pranjal065
/

process_fest

Runtime error

App Files Files Community

Ambareesh T N commited on Mar 24, 2023

Commit

c7baaec

1 Parent(s): c2e5ccb

Add application file

Browse files

Files changed (2) hide show

app.py.py +161 -0
requirements.txt +7 -0

app.py.py ADDED Viewed

	@@ -0,0 +1,161 @@

+# -*- coding: utf-8 -*-
+"""Untitled3.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/18DTgeDomshKNQMgYQ6y6mJbBom9mRw5l
+"""
+# Commented out IPython magic to ensure Python compatibility.
+# %%writefile app.py
+# %%writefile 'app.py'
+import nltk
+import math
+import torch
+# from transformers import AutoModelForSequenceClassification, AutoTokenizer
+# from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
+from nltk.tokenize import word_tokenize, sent_tokenize
+from nltk.corpus import stopwords
+from collections import Counter
+from flair.data import Sentence
+from flair.models import SequenceTagger
+nltk.download('stopwords')
+nltk.download('punkt')
+import streamlit as st
+st.set_page_config(layout="wide")
+def divide_sentence(sentence):
+    conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i']
+    tokens = nltk.word_tokenize(sentence)
+    subsentences = []
+    current_subsentence = []
+    for token in tokens:
+        if token.lower() in conjunctions:
+            if len(current_subsentence)>0:
+                subsentences.append(" ".join(current_subsentence))
+            current_subsentence = []
+        else:
+            current_subsentence.append(token)
+    # Add the final subsentence to the list
+    subsentences.append(" ".join(current_subsentence))
+#     print(subsentences)
+#     d={}
+#     for s in subsentences:
+#         d[s] = {'accuracy':None,}
+    return subsentences
+def topic_identify(subsentences):
+    def sigmoid(x):
+        return 1 / (1 + math.exp(-x))
+    tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all")
+    model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification")
+    model.eval()
+    class_mapping = model.config.id2label
+    topics = []
+    for text in subsentences:
+        with torch.no_grad():
+            tokens = tokenizer(text, return_tensors='pt')
+            output = model(**tokens)
+            flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()]
+            topic = [class_mapping[n] for n, i in enumerate(flags) if i]
+        topics.append(','.join(topic))
+    return topics
+def sentiment_score(subsentences):
+    tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
+    model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
+    from transformers import pipeline
+    sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+    senti = []
+    for sen in subsentences:
+        a=sentiment_task(sen)
+        # [{'label': 'positive', 'score': 0.9484752416610718}]
+        a=a[0]
+        senti.append(a['label']+' , '+str(a['score']))
+    return senti
+def intent_identify(subsentences):
+    model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en'
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSequenceClassification.from_pretrained(model_name)
+    classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
+    intents = []
+    for s in subsentences:
+        res = classifier(s)
+        a=res[0]
+        intents.append(a['label']+' , '+str(a['score']))
+    return intents
+def entity_identify(subsentences):
+    # load the NER tagger
+    tagger = SequenceTagger.load('ner')
+    # create a sentence to analyze
+    entities = []
+    for sentence in subsentences:
+        sentence = Sentence(sentence)
+        # run NER on the sentence
+        tagger.predict(sentence)
+        # print the entities found in the sentence
+        ent = []
+        for entity in sentence.get_spans('ner'):
+            ent.append(entity.text)
+        entities.append(','.join(ent))
+    return entities
+def keyword_identify(subsentences):
+    class KeywordExtractor:
+        def __init__(self):
+            self.stop_words = set(stopwords.words('english'))
+        def extract_keywords(self, text):
+            # tokenize sentences
+            sentences = sent_tokenize(text)
+            # tokenize words and remove stop words
+            words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
+            # count word frequencies
+            word_freq = Counter(words)
+            # sort words by frequency
+            sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
+            # return top 3 keywords
+            return [word[0] for word in sorted_words[:2]]
+    key = KeywordExtractor()
+    keywords=[]
+    for s in subsentences:
+        keyword = key.extract_keywords(s)
+        keywords.append(','.join(keyword))
+    return keywords
+st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
+import pandas as pd
+import numpy as np
+sent = st.text_input(label = 'Enter the Text:')
+button = st.button('submit')
+#sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA"
+if button:
+    subsentences = divide_sentence(sent)
+    topic  = topic_identify(subsentences)
+    sentiment = sentiment_score(subsentences)
+    intent = intent_identify(subsentences)
+    entity = entity_identify(subsentences)
+    keyword = keyword_identify(subsentences)
+    df = pd.DataFrame(
+       {
+        'subsentences': subsentences,
+        'sentiment and score': sentiment,
+        'intent': intent,
+        'entity' : entity,
+        'keyword' : keyword
+       })
+    st.dataframe(data=df, width=None, height=None,use_container_width=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+nltk == 3.7
+torch == 1.13.1
+transformers == 4.25.1
+flair == 0.12.1
+streamlit
+pandas
+numpy