NimaKL commited on
Commit
dc010b2
Β·
1 Parent(s): 39a0a6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -28
app.py CHANGED
@@ -7,12 +7,38 @@ col1, col2= st.columns(2)
7
 
8
  with col1:
9
  st.title("Spamd: Turkish Spam Detector")
10
- st.markdown("Message spam detection tool for Turkish language. Due the small size of the dataset, I decided to go with transformers technology Google BERT. Using the Turkish pre-trained model BERTurk, I imporved the accuracy of the tool by 18 percent compared to the previous model which used fastText.")
11
 
12
 
 
 
 
 
13
 
 
 
14
 
 
 
 
 
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
 
18
  import torch
@@ -47,34 +73,8 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
47
  def namestr(obj, namespace):
48
  return [name for name in namespace if namespace[name] is obj]
49
 
50
- def predict(new_sentence):
51
- # We need Token IDs and Attention Mask for inference on the new sentence
52
- test_ids = []
53
- test_attention_mask = []
54
-
55
- # Apply the tokenizer
56
- encoding = preprocessing(new_sentence, tokenizer)
57
 
58
- # Extract IDs and Attention Mask
59
- test_ids.append(encoding['input_ids'])
60
- test_attention_mask.append(encoding['attention_mask'])
61
- test_ids = torch.cat(test_ids, dim = 0)
62
- test_attention_mask = torch.cat(test_attention_mask, dim = 0)
63
-
64
- # Forward pass, calculate logit predictions
65
- with torch.no_grad():
66
- output = model(test_ids.to(device), token_type_ids = None, attention_mask = test_attention_mask.to(device))
67
-
68
- prediction = 'Spam' if np.argmax(output.logits.cpu().numpy()).flatten().item() == 1 else 'Normal'
69
- pred = 'Predicted Class: '+ prediction
70
- with col2:
71
- st.header(pred)
72
 
73
  #st.write('Input', namestr(new_sentence, globals()),': \n', new_sentence)
74
 
75
- with col2:
76
- text = st.text_input("Enter the text you'd like to analyze for spam.")
77
- if text:
78
- predict(text)
79
- if st.button('Analyze'):
80
- predict(text)
 
7
 
8
  with col1:
9
  st.title("Spamd: Turkish Spam Detector")
10
+ st.markdown("Message spam detection tool for Turkish language. Due the small size of the dataset, I decided to go with transformers technology Google BERT. Using the Turkish pre-trained model BERTurk, I imporved the accuracy of the tool by 18 percent compared to the previous model which used fastText.")
11
 
12
 
13
+ def predict(new_sentence):
14
+ # We need Token IDs and Attention Mask for inference on the new sentence
15
+ test_ids = []
16
+ test_attention_mask = []
17
 
18
+ # Apply the tokenizer
19
+ encoding = preprocessing(new_sentence, tokenizer)
20
 
21
+ # Extract IDs and Attention Mask
22
+ test_ids.append(encoding['input_ids'])
23
+ test_attention_mask.append(encoding['attention_mask'])
24
+ test_ids = torch.cat(test_ids, dim = 0)
25
+ test_attention_mask = torch.cat(test_attention_mask, dim = 0)
26
 
27
+ # Forward pass, calculate logit predictions
28
+ with torch.no_grad():
29
+ output = model(test_ids.to(device), token_type_ids = None, attention_mask = test_attention_mask.to(device))
30
+
31
+ prediction = 'Spam' if np.argmax(output.logits.cpu().numpy()).flatten().item() == 1 else 'Normal'
32
+ pred = 'Predicted Class: '+ prediction
33
+ with col2:
34
+ st.header(pred)
35
+
36
+ with col2:
37
+ text = st.text_input("Enter the text you'd like to analyze for spam.")
38
+ if text or st.button('Analyze'):
39
+ predict(text)
40
+
41
+
42
 
43
 
44
  import torch
 
73
  def namestr(obj, namespace):
74
  return [name for name in namespace if namespace[name] is obj]
75
 
 
 
 
 
 
 
 
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  #st.write('Input', namestr(new_sentence, globals()),': \n', new_sentence)
79
 
80
+