stefanoviel
commited on
Commit
·
b1a742b
1
Parent(s):
3c2ac96
removing useless st print
Browse files- src/streamlit_app.py +7 -7
src/streamlit_app.py
CHANGED
@@ -33,7 +33,7 @@ def create_and_save_embeddings(model, data_df):
|
|
33 |
Generates and saves document embeddings and the dataframe.
|
34 |
This function is called only once if the files don't exist.
|
35 |
"""
|
36 |
-
|
37 |
# Combine title and abstract for richer embeddings
|
38 |
data_df['text_to_embed'] = data_df['title'] + ". " + data_df['abstract'].fillna('')
|
39 |
|
@@ -44,9 +44,9 @@ def create_and_save_embeddings(model, data_df):
|
|
44 |
try:
|
45 |
torch.save(corpus_embeddings.cpu(), EMBEDDINGS_FILE)
|
46 |
data_df.to_pickle(DATA_FILE)
|
47 |
-
|
48 |
except Exception as e:
|
49 |
-
|
50 |
|
51 |
return corpus_embeddings, data_df
|
52 |
|
@@ -65,19 +65,19 @@ def load_data_and_embeddings():
|
|
65 |
data_df = pd.read_pickle(DATA_FILE)
|
66 |
return model, corpus_embeddings, data_df
|
67 |
except Exception as e:
|
68 |
-
|
69 |
|
70 |
-
|
71 |
|
72 |
# Load the raw data from CSV
|
73 |
try:
|
74 |
data_df = pd.read_csv(CSV_FILE)
|
75 |
corpus_embeddings, data_df = create_and_save_embeddings(model, data_df)
|
76 |
except FileNotFoundError:
|
77 |
-
|
78 |
st.stop()
|
79 |
except Exception as e:
|
80 |
-
|
81 |
st.stop()
|
82 |
|
83 |
return model, corpus_embeddings, data_df
|
|
|
33 |
Generates and saves document embeddings and the dataframe.
|
34 |
This function is called only once if the files don't exist.
|
35 |
"""
|
36 |
+
print("First time setup: Generating and saving embeddings. This may take a moment...")
|
37 |
# Combine title and abstract for richer embeddings
|
38 |
data_df['text_to_embed'] = data_df['title'] + ". " + data_df['abstract'].fillna('')
|
39 |
|
|
|
44 |
try:
|
45 |
torch.save(corpus_embeddings.cpu(), EMBEDDINGS_FILE)
|
46 |
data_df.to_pickle(DATA_FILE)
|
47 |
+
print("Embeddings and data saved successfully!")
|
48 |
except Exception as e:
|
49 |
+
print(f"Could not save embeddings to disk: {e}. Will regenerate on each session.")
|
50 |
|
51 |
return corpus_embeddings, data_df
|
52 |
|
|
|
65 |
data_df = pd.read_pickle(DATA_FILE)
|
66 |
return model, corpus_embeddings, data_df
|
67 |
except Exception as e:
|
68 |
+
print(f"Could not load saved embeddings: {e}. Regenerating...")
|
69 |
|
70 |
+
print("embeding model path exists: " + str(Path(EMBEDDING_MODEL).exists()))
|
71 |
|
72 |
# Load the raw data from CSV
|
73 |
try:
|
74 |
data_df = pd.read_csv(CSV_FILE)
|
75 |
corpus_embeddings, data_df = create_and_save_embeddings(model, data_df)
|
76 |
except FileNotFoundError:
|
77 |
+
print(f"CSV file '{CSV_FILE}' not found. Please ensure it's in your repository.")
|
78 |
st.stop()
|
79 |
except Exception as e:
|
80 |
+
print(f"Error loading data: {e}")
|
81 |
st.stop()
|
82 |
|
83 |
return model, corpus_embeddings, data_df
|