stefanoviel
commited on
Commit
Β·
3c2ac96
1
Parent(s):
0fd8f7a
now it works
Browse files
Dockerfile
CHANGED
@@ -12,6 +12,8 @@ RUN apt-get update && apt-get install -y \
|
|
12 |
COPY requirements.txt ./
|
13 |
COPY src/ ./src/
|
14 |
COPY papers_with_abstracts_parallel.csv ./
|
|
|
|
|
15 |
|
16 |
RUN pip3 install -r requirements.txt
|
17 |
|
|
|
12 |
COPY requirements.txt ./
|
13 |
COPY src/ ./src/
|
14 |
COPY papers_with_abstracts_parallel.csv ./
|
15 |
+
COPY paper_embeddings.pt ./
|
16 |
+
COPY papers_data.pkl ./
|
17 |
|
18 |
RUN pip3 install -r requirements.txt
|
19 |
|
src/paper_embeddings.pt β paper_embeddings.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5022824
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f89ffdcacd8896a16962b5292a9b47be7120e2d4988a6f1e3d8f694e2b1b6fb7
|
3 |
size 5022824
|
src/papers_data.pkl β papers_data.pkl
RENAMED
File without changes
|
src/streamlit_app.py
CHANGED
@@ -9,16 +9,13 @@ from io import StringIO
|
|
9 |
|
10 |
# --- Configuration ---
|
11 |
EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
|
12 |
-
# Use /tmp directory for temporary files in Hugging Face Spaces
|
13 |
-
script_dir = Path(__file__).parent
|
14 |
|
15 |
# Define paths relative to the script's directory
|
16 |
-
DATA_FILE = '
|
17 |
-
EMBEDDINGS_FILE = '
|
18 |
CSV_FILE = 'papers_with_abstracts_parallel.csv'
|
19 |
|
20 |
|
21 |
-
|
22 |
# --- Caching Functions ---
|
23 |
@st.cache_resource
|
24 |
def load_embedding_model():
|
@@ -45,7 +42,7 @@ def create_and_save_embeddings(model, data_df):
|
|
45 |
|
46 |
# Save embeddings and dataframe to /tmp directory
|
47 |
try:
|
48 |
-
torch.save(corpus_embeddings, EMBEDDINGS_FILE)
|
49 |
data_df.to_pickle(DATA_FILE)
|
50 |
st.success("Embeddings and data saved successfully!")
|
51 |
except Exception as e:
|
|
|
9 |
|
10 |
# --- Configuration ---
|
11 |
EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
|
|
|
|
|
12 |
|
13 |
# Define paths relative to the script's directory
|
14 |
+
DATA_FILE = 'papers_data.pkl'
|
15 |
+
EMBEDDINGS_FILE = 'paper_embeddings.pt'
|
16 |
CSV_FILE = 'papers_with_abstracts_parallel.csv'
|
17 |
|
18 |
|
|
|
19 |
# --- Caching Functions ---
|
20 |
@st.cache_resource
|
21 |
def load_embedding_model():
|
|
|
42 |
|
43 |
# Save embeddings and dataframe to /tmp directory
|
44 |
try:
|
45 |
+
torch.save(corpus_embeddings.cpu(), EMBEDDINGS_FILE)
|
46 |
data_df.to_pickle(DATA_FILE)
|
47 |
st.success("Embeddings and data saved successfully!")
|
48 |
except Exception as e:
|