Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,6 @@ import pkg_resources
|
|
14 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
15 |
font_path = os.path.join(current_dir, "ArabicR2013-J25x.ttf")
|
16 |
|
17 |
-
# Add Arabic stop words
|
18 |
ARABIC_STOP_WORDS = {
|
19 |
'في', 'من', 'إلى', 'على', 'عن', 'مع', 'خلال', 'حتى', 'إذا', 'ثم',
|
20 |
'أو', 'و', 'ف', 'ل', 'ب', 'ك', 'لل', 'ال', 'هذا', 'هذه', 'ذلك',
|
@@ -31,7 +30,7 @@ ARABIC_STOP_WORDS = {
|
|
31 |
'اول', 'ضمن', 'انها', 'جميع', 'الذي', 'قبل', 'بعد', 'حول', 'ايضا',
|
32 |
'لازم', 'حاجة', 'علي', 'يجب', 'صار', 'صارت', 'تحت', 'ضد'
|
33 |
}
|
34 |
-
|
35 |
st.set_page_config(
|
36 |
page_title="Arabic Poem Analysis",
|
37 |
page_icon="📚",
|
@@ -250,7 +249,6 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
250 |
embedding_model=bert_model,
|
251 |
**topic_model_params)
|
252 |
|
253 |
-
# Create vectorizer with stop words
|
254 |
vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS),
|
255 |
min_df=1,
|
256 |
max_df=1.0)
|
@@ -316,7 +314,7 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
316 |
continue
|
317 |
|
318 |
return summaries, topic_model
|
319 |
-
|
320 |
try:
|
321 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
322 |
st.success("Models loaded successfully!")
|
@@ -328,28 +326,23 @@ except Exception as e:
|
|
328 |
st.title("📚 Arabic Poem Analysis")
|
329 |
st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
|
330 |
|
331 |
-
# File upload
|
332 |
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
|
333 |
|
334 |
if uploaded_file is not None:
|
335 |
try:
|
336 |
-
# Read the file
|
337 |
if uploaded_file.name.endswith('.csv'):
|
338 |
df = pd.read_csv(uploaded_file)
|
339 |
else:
|
340 |
df = pd.read_excel(uploaded_file)
|
341 |
|
342 |
-
# Validate columns
|
343 |
required_columns = ['country', 'poem']
|
344 |
if not all(col in df.columns for col in required_columns):
|
345 |
st.error("File must contain 'country' and 'poem' columns.")
|
346 |
st.stop()
|
347 |
|
348 |
-
# Clean data
|
349 |
df['country'] = df['country'].str.strip()
|
350 |
df = df.dropna(subset=['country', 'poem'])
|
351 |
|
352 |
-
# Add topic modeling controls
|
353 |
st.subheader("Topic Modeling Settings")
|
354 |
col1, col2 = st.columns(2)
|
355 |
|
@@ -404,7 +397,6 @@ if uploaded_file is not None:
|
|
404 |
if summaries:
|
405 |
st.success("Analysis complete!")
|
406 |
|
407 |
-
# Display results in tabs
|
408 |
tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
|
409 |
|
410 |
with tab1:
|
@@ -445,7 +437,6 @@ if uploaded_file is not None:
|
|
445 |
else:
|
446 |
st.info("👆 Upload a file to get started!")
|
447 |
|
448 |
-
# Example format
|
449 |
st.write("### Expected File Format:")
|
450 |
example_df = pd.DataFrame({
|
451 |
'country': ['Egypt', 'Palestine'],
|
|
|
14 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
15 |
font_path = os.path.join(current_dir, "ArabicR2013-J25x.ttf")
|
16 |
|
|
|
17 |
ARABIC_STOP_WORDS = {
|
18 |
'في', 'من', 'إلى', 'على', 'عن', 'مع', 'خلال', 'حتى', 'إذا', 'ثم',
|
19 |
'أو', 'و', 'ف', 'ل', 'ب', 'ك', 'لل', 'ال', 'هذا', 'هذه', 'ذلك',
|
|
|
30 |
'اول', 'ضمن', 'انها', 'جميع', 'الذي', 'قبل', 'بعد', 'حول', 'ايضا',
|
31 |
'لازم', 'حاجة', 'علي', 'يجب', 'صار', 'صارت', 'تحت', 'ضد'
|
32 |
}
|
33 |
+
|
34 |
st.set_page_config(
|
35 |
page_title="Arabic Poem Analysis",
|
36 |
page_icon="📚",
|
|
|
249 |
embedding_model=bert_model,
|
250 |
**topic_model_params)
|
251 |
|
|
|
252 |
vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS),
|
253 |
min_df=1,
|
254 |
max_df=1.0)
|
|
|
314 |
continue
|
315 |
|
316 |
return summaries, topic_model
|
317 |
+
|
318 |
try:
|
319 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
320 |
st.success("Models loaded successfully!")
|
|
|
326 |
st.title("📚 Arabic Poem Analysis")
|
327 |
st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
|
328 |
|
|
|
329 |
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
|
330 |
|
331 |
if uploaded_file is not None:
|
332 |
try:
|
|
|
333 |
if uploaded_file.name.endswith('.csv'):
|
334 |
df = pd.read_csv(uploaded_file)
|
335 |
else:
|
336 |
df = pd.read_excel(uploaded_file)
|
337 |
|
|
|
338 |
required_columns = ['country', 'poem']
|
339 |
if not all(col in df.columns for col in required_columns):
|
340 |
st.error("File must contain 'country' and 'poem' columns.")
|
341 |
st.stop()
|
342 |
|
|
|
343 |
df['country'] = df['country'].str.strip()
|
344 |
df = df.dropna(subset=['country', 'poem'])
|
345 |
|
|
|
346 |
st.subheader("Topic Modeling Settings")
|
347 |
col1, col2 = st.columns(2)
|
348 |
|
|
|
397 |
if summaries:
|
398 |
st.success("Analysis complete!")
|
399 |
|
|
|
400 |
tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
|
401 |
|
402 |
with tab1:
|
|
|
437 |
else:
|
438 |
st.info("👆 Upload a file to get started!")
|
439 |
|
|
|
440 |
st.write("### Expected File Format:")
|
441 |
example_df = pd.DataFrame({
|
442 |
'country': ['Egypt', 'Palestine'],
|