feat: added texts
Browse files- app.py +10 -4
- static/description.txt +7 -0
- static/kazsandra.jpg +0 -0
app.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
from transformers import AutoModelForSequenceClassification
|
@@ -13,12 +15,16 @@ def get_pipe():
|
|
13 |
return TextClassificationPipeline(model=model, tokenizer=tokenizer)
|
14 |
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
19 |
input_text = st.text_area('Input text', placeholder='Provide your text', value='Осы кітап қызық сияқты.')
|
20 |
# reviews = ["Бұл бейнефильм маған түк ұнамады.", "Осы кітап қызық сияқты."]
|
21 |
-
|
22 |
# for review in reviews:
|
23 |
if input_text:
|
24 |
out = pipe(input_text)[0]
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
import streamlit as st
|
4 |
|
5 |
from transformers import AutoModelForSequenceClassification
|
|
|
15 |
return TextClassificationPipeline(model=model, tokenizer=tokenizer)
|
16 |
|
17 |
|
18 |
+
st.title('KazSandra')
|
19 |
+
static_folder = Path(__file__).parent / 'static'
|
20 |
+
assert static_folder.exists()
|
21 |
+
|
22 |
+
st.write((static_folder / 'description.txt').read_text())
|
23 |
+
st.image(str(static_folder / 'kazsandra.jpg'))
|
24 |
+
|
25 |
input_text = st.text_area('Input text', placeholder='Provide your text', value='Осы кітап қызық сияқты.')
|
26 |
# reviews = ["Бұл бейнефильм маған түк ұнамады.", "Осы кітап қызық сияқты."]
|
27 |
+
pipe = get_pipe()
|
28 |
# for review in reviews:
|
29 |
if input_text:
|
30 |
out = pipe(input_text)[0]
|
static/description.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
KazSAnDRA is a dataset developed for Kazakh sentiment analysis, representing the first and most extensive publicly available resource in this field. This comprehensive dataset includes 180,064 reviews obtained from a variety of sources, supplemented with numerical ratings from 1 to 5 to quantitatively capture customer sentiments. The project also focused on automating Kazakh sentiment classification by developing and evaluating four different machine learning models. These models were trained for both polarity classification and score classification, with performance assessed under balanced and imbalanced conditions. The most effective model achieved an F1-score of 0.81 for polarity classification and 0.39 for score classification on test datasets.
|
2 |
+
|
3 |
+
The dataset and fine-tuned models are open access and available for download under the Creative Commons Attribution 4.0 International License (CC BY 4.0) through our GitHub repository.
|
4 |
+
|
5 |
+
DOI: https://doi.org/10.48550/arXiv.2403.19335
|
6 |
+
|
7 |
+
Data: https://github.com/IS2AI/KazSAnDRA
|
static/kazsandra.jpg
ADDED