Spaces:
Sleeping
Sleeping
Commit
·
5037fb3
1
Parent(s):
80d50c2
progress more 40+
Browse files
app.py
CHANGED
|
@@ -34,12 +34,18 @@ def create_analysis_data(df):
|
|
| 34 |
|
| 35 |
# Function for lemmatizing Russian text
|
| 36 |
def lemmatize_text(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
words = text.split()
|
| 38 |
lemmatized_words = []
|
| 39 |
for word in tqdm(words, desc="Lemmatizing", unit="word"):
|
| 40 |
lemmatized_word = ''.join(mystem.lemmatize(word))
|
| 41 |
lemmatized_words.append(lemmatized_word)
|
| 42 |
-
return ' '.join(lemmatized_words)
|
| 43 |
|
| 44 |
# Translation model for Russian to English
|
| 45 |
model_name = "Helsinki-NLP/opus-mt-ru-en"
|
|
@@ -243,7 +249,7 @@ def create_output_file(df, uploaded_file, analysis_df):
|
|
| 243 |
return output
|
| 244 |
|
| 245 |
def main():
|
| 246 |
-
st.title("... приступим к анализу... версия
|
| 247 |
|
| 248 |
uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
|
| 249 |
|
|
|
|
| 34 |
|
| 35 |
# Function for lemmatizing Russian text
|
| 36 |
def lemmatize_text(text):
|
| 37 |
+
if pd.isna(text):
|
| 38 |
+
return ""
|
| 39 |
+
|
| 40 |
+
if not isinstance(text, str):
|
| 41 |
+
text = str(text)
|
| 42 |
+
|
| 43 |
words = text.split()
|
| 44 |
lemmatized_words = []
|
| 45 |
for word in tqdm(words, desc="Lemmatizing", unit="word"):
|
| 46 |
lemmatized_word = ''.join(mystem.lemmatize(word))
|
| 47 |
lemmatized_words.append(lemmatized_word)
|
| 48 |
+
return ' '.join(lemmatized_words)
|
| 49 |
|
| 50 |
# Translation model for Russian to English
|
| 51 |
model_name = "Helsinki-NLP/opus-mt-ru-en"
|
|
|
|
| 249 |
return output
|
| 250 |
|
| 251 |
def main():
|
| 252 |
+
st.title("... приступим к анализу... версия 40+")
|
| 253 |
|
| 254 |
uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
|
| 255 |
|