arpannookala commited on
Commit
1c201ca
·
1 Parent(s): badce79

change app add shutil

Browse files
Files changed (1) hide show
  1. app.py +38 -15
app.py CHANGED
@@ -21,31 +21,54 @@ nltk.download('omw-1.4')
21
  import gdown
22
  import zipfile
23
  import os
24
-
25
  # Function to download and extract folder
26
  def download_and_extract_gdrive(file_id, destination, extract_to):
27
  # Download the zip file
28
  gdown.download(f"https://drive.google.com/uc?id={file_id}", destination, quiet=False)
29
 
30
- # Extract the zip file
 
 
 
31
  with zipfile.ZipFile(destination, 'r') as zip_ref:
32
- zip_ref.extractall(extract_to)
33
- os.remove(destination) # Clean up the downloaded zip file
34
-
35
- # Download and extract LanceDB and fine-tuned model
36
- st.info("Downloading and setting up necessary data. This might take a while...")
 
 
 
 
 
 
 
 
 
 
 
 
37
 
 
 
38
  download_and_extract_gdrive(
39
- file_id="1Qnb8bs_NXWlhDwGoswOgsp2DiLBMbfSY", # Replace with the actual Google Drive file ID
40
- destination="lancedb_directory_main",
41
- extract_to="./"
42
  )
43
 
44
- download_and_extract_gdrive(
45
- file_id="1_9VVuN_P3zsTBYzg0lAeh4ghd9zhXS3w", # Replace with the actual Google Drive file ID
46
- destination="finetuned_all_minilm_l6_v2",
47
- extract_to="./"
48
- )
 
 
 
 
 
 
49
  # # --------------------------- Load the LanceDB Table and Models --------------------------- #
50
 
51
  # Connect to LanceDB
 
21
  import gdown
22
  import zipfile
23
  import os
24
+ import shutil
25
  # Function to download and extract folder
26
  def download_and_extract_gdrive(file_id, destination, extract_to):
27
  # Download the zip file
28
  gdown.download(f"https://drive.google.com/uc?id={file_id}", destination, quiet=False)
29
 
30
+ # Use a temporary directory to avoid conflicts
31
+ temp_dir = "./temp_extract"
32
+ os.makedirs(temp_dir, exist_ok=True)
33
+
34
  with zipfile.ZipFile(destination, 'r') as zip_ref:
35
+ zip_ref.extractall(temp_dir)
36
+
37
+ # Ensure the main directory exists
38
+ if not os.path.exists(extract_to):
39
+ os.makedirs(extract_to, exist_ok=True)
40
+
41
+ # Move extracted files into the target directory
42
+ for item in os.listdir(temp_dir):
43
+ item_path = os.path.join(temp_dir, item)
44
+ if os.path.isdir(item_path):
45
+ shutil.move(item_path, extract_to)
46
+ elif os.path.isfile(item_path):
47
+ shutil.move(item_path, os.path.join(extract_to, os.path.basename(item_path)))
48
+
49
+ # Clean up temporary files
50
+ shutil.rmtree(temp_dir)
51
+ os.remove(destination)
52
 
53
+ # Download LanceDB directory
54
+ st.info("Downloading and setting up LanceDB directory. This may take some time...")
55
  download_and_extract_gdrive(
56
+ file_id="your_lancedb_file_id", # Replace with actual Google Drive file ID
57
+ destination="lancedb_directory_main.zip",
58
+ extract_to="./lancedb_directory_main"
59
  )
60
 
61
+ # Ensure LanceDB structure is correct
62
+ expected_directories = [
63
+ "enhanced_papers_pretrained_1.lance",
64
+ "enhanced_papers_pretrained_2.lance",
65
+ "enhanced_papers_finetuned.lance"
66
+ ]
67
+ for subdir in expected_directories:
68
+ subdir_path = os.path.join("./lancedb_directory_main", subdir)
69
+ if not os.path.isdir(subdir_path):
70
+ raise FileNotFoundError(f"Missing expected directory: {subdir_path}")
71
+
72
  # # --------------------------- Load the LanceDB Table and Models --------------------------- #
73
 
74
  # Connect to LanceDB