stefanoviel commited on
Commit
3c2ac96
Β·
1 Parent(s): 0fd8f7a

now it works

Browse files
Dockerfile CHANGED
@@ -12,6 +12,8 @@ RUN apt-get update && apt-get install -y \
12
  COPY requirements.txt ./
13
  COPY src/ ./src/
14
  COPY papers_with_abstracts_parallel.csv ./
 
 
15
 
16
  RUN pip3 install -r requirements.txt
17
 
 
12
  COPY requirements.txt ./
13
  COPY src/ ./src/
14
  COPY papers_with_abstracts_parallel.csv ./
15
+ COPY paper_embeddings.pt ./
16
+ COPY papers_data.pkl ./
17
 
18
  RUN pip3 install -r requirements.txt
19
 
src/paper_embeddings.pt β†’ paper_embeddings.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0b1298f6ed20cb137ed06c7fe2a7633754507c1dcbd79965dfd58feb5868927
3
  size 5022824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f89ffdcacd8896a16962b5292a9b47be7120e2d4988a6f1e3d8f694e2b1b6fb7
3
  size 5022824
src/papers_data.pkl β†’ papers_data.pkl RENAMED
File without changes
src/streamlit_app.py CHANGED
@@ -9,16 +9,13 @@ from io import StringIO
9
 
10
  # --- Configuration ---
11
  EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
12
- # Use /tmp directory for temporary files in Hugging Face Spaces
13
- script_dir = Path(__file__).parent
14
 
15
  # Define paths relative to the script's directory
16
- DATA_FILE = './src/papers_data.pkl'
17
- EMBEDDINGS_FILE = './src/paper_embeddings.pt'
18
  CSV_FILE = 'papers_with_abstracts_parallel.csv'
19
 
20
 
21
-
22
  # --- Caching Functions ---
23
  @st.cache_resource
24
  def load_embedding_model():
@@ -45,7 +42,7 @@ def create_and_save_embeddings(model, data_df):
45
 
46
  # Save embeddings and dataframe to /tmp directory
47
  try:
48
- torch.save(corpus_embeddings, EMBEDDINGS_FILE)
49
  data_df.to_pickle(DATA_FILE)
50
  st.success("Embeddings and data saved successfully!")
51
  except Exception as e:
 
9
 
10
  # --- Configuration ---
11
  EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
 
 
12
 
13
  # Define paths relative to the script's directory
14
+ DATA_FILE = 'papers_data.pkl'
15
+ EMBEDDINGS_FILE = 'paper_embeddings.pt'
16
  CSV_FILE = 'papers_with_abstracts_parallel.csv'
17
 
18
 
 
19
  # --- Caching Functions ---
20
  @st.cache_resource
21
  def load_embedding_model():
 
42
 
43
  # Save embeddings and dataframe to /tmp directory
44
  try:
45
+ torch.save(corpus_embeddings.cpu(), EMBEDDINGS_FILE)
46
  data_df.to_pickle(DATA_FILE)
47
  st.success("Embeddings and data saved successfully!")
48
  except Exception as e: