azamat commited on
Commit
3e22f77
·
1 Parent(s): d2228ca
Files changed (2) hide show
  1. app.py +50 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import gradio as gr
3
+ from transformers import pipeline
4
+ from transformers import AutoTokenizer
5
+ from transformers import AutoModelForSequenceClassification
6
+
7
+ def process_tweet(tweet):
8
+ # remove links
9
+ tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))', '', tweet)
10
+ # remove usernames
11
+ tweet = re.sub('@[^\s]+', '', tweet)
12
+ # remove additional white spaces
13
+ tweet = re.sub('[\s]+', ' ', tweet)
14
+ # replace hashtags with words
15
+ tweet = re.sub(r'#([^\s]+)', r'\1', tweet)
16
+ # trim
17
+ tweet = tweet.strip('\'"')
18
+ return tweet #if len(tweet) > 0 else ""
19
+
20
+ tokenizer = AutoTokenizer.from_pretrained(
21
+ "azamat/geocoder_model_xlm_roberta_50"
22
+ )
23
+
24
+ relevancy_pipeline = pipeline("sentiment-analysis", model="azamat/geocoder_model")
25
+
26
+ coordinates_model = AutoModelForSequenceClassification.from_pretrained(
27
+ "azamat/geocoder_model_xlm_roberta_50",
28
+ )
29
+
30
+ def predict_relevancy(text):
31
+ outputs = relevancy_pipeline(text)
32
+ return outputs[0]['label'], outputs[0]['score']
33
+
34
+ def predict_coordinates(text):
35
+ encoding = tokenizer(text, padding="max_length", truncation=True, \
36
+ max_length=128, return_tensors='pt')
37
+ outputs = coordinates_model(**encoding)
38
+ return outputs[0][0], outputs[0][1]
39
+
40
+ def predict(text):
41
+ text = process_tweet(text)
42
+ relevancy_label, relevancy_score = predict_relevancy(text)
43
+ if relevancy_label == 'relevant':
44
+ lat, lon = predict_coordinates(text)
45
+ return f"Relevancy model is confident for {relevancy_score * 100}% that tweet has the geolocation relevant information.\n" + \
46
+ f"Precited location coordinates are: lat: {lat} lon: {lon}"
47
+ return f"Relevancy model is confident for {relevancy_score * 100}% that tweet does not have the geolocation relevant information."
48
+
49
+ iface = gr.Interface(fn=predict, inputs="text", outputs="text")
50
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ datasets