Joshnicholas commited on
Commit
8894bfe
·
verified ·
1 Parent(s): 4db4bd5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Adapted from https://huggingface.co/spaces/valurank/News_Articles_Categorization
2
+
3
+ #importing the necessary libraries
4
+ import gradio as gr
5
+ import numpy as np
6
+ import pandas as pd
7
+ import re
8
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
9
+ import torch
10
+
11
+ #Defining the labels of the models
12
+ labels = ["business", "science","health", "world", "sport", "politics", "entertainment", "tech"]
13
+
14
+ #Defining the models and tokenuzer
15
+ model_name = "valurank/finetuned-distilbert-news-article-categorization"
16
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+
19
+ """
20
+ #Reading in the text file
21
+ def read_in_text(url):
22
+ with open(url, 'r') as file:
23
+ article = file.read()
24
+
25
+ return article
26
+ """
27
+
28
+ def clean_text(raw_text):
29
+ text = raw_text.encode("ascii", errors="ignore").decode(
30
+ "ascii"
31
+ ) # remove non-ascii, Chinese characters
32
+
33
+ text = re.sub(r"\n", " ", text)
34
+ text = re.sub(r"\n\n", " ", text)
35
+ text = re.sub(r"\t", " ", text)
36
+ text = text.strip(" ")
37
+ text = re.sub(
38
+ " +", " ", text
39
+ ).strip() # get rid of multiple spaces and replace with a single
40
+
41
+ text = re.sub(r"Date\s\d{1,2}\/\d{1,2}\/\d{4}", "", text) #remove date
42
+ text = re.sub(r"\d{1,2}:\d{2}\s[A-Z]+\s[A-Z]+", "", text) #remove time
43
+
44
+ return text
45
+
46
+ #Defining a function to get the category of the news article
47
+ def get_category(text):
48
+ text = clean_text(text)
49
+
50
+ input_tensor = tokenizer.encode(text, return_tensors="pt", truncation=True)
51
+ logits = model(input_tensor).logits
52
+
53
+ softmax = torch.nn.Softmax(dim=1)
54
+ probs = softmax(logits)[0]
55
+ probs = probs.cpu().detach().numpy()
56
+ max_index = np.argmax(probs)
57
+ emotion = labels[max_index]
58
+
59
+ return emotion
60
+
61
+ #Creating the interface for the radio app
62
+ demo = gr.Interface(get_category, inputs=gr.inputs.Textbox(label="Drop your articles here"),
63
+ outputs = "text",
64
+ title="News Article Categorization")
65
+
66
+
67
+ #Launching the gradio app
68
+ if __name__ == "__main__":
69
+ demo.launch(debug=True)