File size: 816 Bytes
0271ea4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2299201
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# A model for predicting the gender of author of news article
## Usage:

```
import re
from transformers import pipeline
from html import unescape
from unicodedata import normalize

re_multispace = re.compile(r"\s+")

def normalize_text(text):
    if text == None:
        return None

    text = text.strip()
    text = text.replace("\n", " ")
    text = text.replace("\t", " ")
    text = text.replace("\r", " ")
    text = re_multispace.sub(" ", text)
    text = unescape(text)
    text = normalize("NFKC", text)
    return text


model = pipeline(task="text-classification",
 model=f"hynky/Gender", tokenizer="ufal/robeczech-base",
 truncation=True, max_length=512,
 top_k=5
)


def predict(article):
    article = normalize_text(article)
    predictions = model(article)

predict("Dnes v noci bude pršet.")
```