Spaces:

Joshnicholas
/

news_categorisation

Runtime error

App Files Files Community

Joshnicholas commited on Feb 19, 2024

Commit

8894bfe

verified ·

1 Parent(s): 4db4bd5

Create app.py

Browse files

Files changed (1) hide show

app.py +69 -0

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+### Adapted from https://huggingface.co/spaces/valurank/News_Articles_Categorization
+#importing the necessary libraries
+import gradio as gr
+import numpy as np
+import pandas as pd
+import re
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+#Defining the labels of the models
+labels = ["business", "science","health", "world", "sport", "politics", "entertainment", "tech"]
+#Defining the models and tokenuzer
+model_name = "valurank/finetuned-distilbert-news-article-categorization"
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+"""
+#Reading in the text file
+def read_in_text(url):
+  with open(url, 'r') as file:
+    article = file.read()
+    return article
+"""
+def clean_text(raw_text):
+  text = raw_text.encode("ascii", errors="ignore").decode(
+          "ascii"
+    )  # remove non-ascii, Chinese characters
+  text = re.sub(r"\n", " ", text)
+  text = re.sub(r"\n\n", " ", text)
+  text = re.sub(r"\t", " ", text)
+  text = text.strip(" ")
+  text = re.sub(
+        " +", " ", text
+    ).strip()  # get rid of multiple spaces and replace with a single
+  text = re.sub(r"Date\s\d{1,2}\/\d{1,2}\/\d{4}", "", text) #remove date
+  text = re.sub(r"\d{1,2}:\d{2}\s[A-Z]+\s[A-Z]+", "", text) #remove time
+  return text
+#Defining a function to get the category of the news article
+def get_category(text):
+  text = clean_text(text)
+  input_tensor = tokenizer.encode(text, return_tensors="pt", truncation=True)
+  logits = model(input_tensor).logits
+  softmax = torch.nn.Softmax(dim=1)
+  probs = softmax(logits)[0]
+  probs = probs.cpu().detach().numpy()
+  max_index = np.argmax(probs)
+  emotion = labels[max_index]
+  return emotion
+#Creating the interface for the radio app
+demo = gr.Interface(get_category, inputs=gr.inputs.Textbox(label="Drop your articles here"),
+                    outputs = "text",
+                    title="News Article Categorization")
+#Launching the gradio app
+if __name__ == "__main__":
+  demo.launch(debug=True)