arpannookala commited on
Commit
0909f8b
·
1 Parent(s): 1c201ca

change app add shutil for lance

Browse files
Files changed (1) hide show
  1. app.py +16 -19
app.py CHANGED
@@ -26,48 +26,45 @@ import shutil
26
  def download_and_extract_gdrive(file_id, destination, extract_to):
27
  # Download the zip file
28
  gdown.download(f"https://drive.google.com/uc?id={file_id}", destination, quiet=False)
29
-
30
- # Use a temporary directory to avoid conflicts
31
  temp_dir = "./temp_extract"
32
  os.makedirs(temp_dir, exist_ok=True)
33
-
34
  with zipfile.ZipFile(destination, 'r') as zip_ref:
35
  zip_ref.extractall(temp_dir)
36
 
37
- # Ensure the main directory exists
38
  if not os.path.exists(extract_to):
39
  os.makedirs(extract_to, exist_ok=True)
40
 
41
- # Move extracted files into the target directory
42
  for item in os.listdir(temp_dir):
43
  item_path = os.path.join(temp_dir, item)
44
- if os.path.isdir(item_path):
45
- shutil.move(item_path, extract_to)
46
- elif os.path.isfile(item_path):
47
- shutil.move(item_path, os.path.join(extract_to, os.path.basename(item_path)))
48
-
49
- # Clean up temporary files
50
  shutil.rmtree(temp_dir)
51
  os.remove(destination)
52
 
53
- # Download LanceDB directory
54
- st.info("Downloading and setting up LanceDB directory. This may take some time...")
 
55
  download_and_extract_gdrive(
56
  file_id="your_lancedb_file_id", # Replace with actual Google Drive file ID
57
  destination="lancedb_directory_main.zip",
58
  extract_to="./lancedb_directory_main"
59
  )
60
 
61
- # Ensure LanceDB structure is correct
62
- expected_directories = [
63
  "enhanced_papers_pretrained_1.lance",
64
  "enhanced_papers_pretrained_2.lance",
65
  "enhanced_papers_finetuned.lance"
66
  ]
67
- for subdir in expected_directories:
68
- subdir_path = os.path.join("./lancedb_directory_main", subdir)
69
- if not os.path.isdir(subdir_path):
70
- raise FileNotFoundError(f"Missing expected directory: {subdir_path}")
71
 
72
  # # --------------------------- Load the LanceDB Table and Models --------------------------- #
73
 
 
26
  def download_and_extract_gdrive(file_id, destination, extract_to):
27
  # Download the zip file
28
  gdown.download(f"https://drive.google.com/uc?id={file_id}", destination, quiet=False)
29
+
30
+ # Use a temporary directory for extraction
31
  temp_dir = "./temp_extract"
32
  os.makedirs(temp_dir, exist_ok=True)
33
+
34
  with zipfile.ZipFile(destination, 'r') as zip_ref:
35
  zip_ref.extractall(temp_dir)
36
 
37
+ # Ensure files are moved correctly
38
  if not os.path.exists(extract_to):
39
  os.makedirs(extract_to, exist_ok=True)
40
 
 
41
  for item in os.listdir(temp_dir):
42
  item_path = os.path.join(temp_dir, item)
43
+ shutil.move(item_path, os.path.join(extract_to, item))
44
+
45
+ # Cleanup
 
 
 
46
  shutil.rmtree(temp_dir)
47
  os.remove(destination)
48
 
49
+ # Download LanceDB files
50
+ st.info("Downloading and setting up LanceDB files. This may take a while...")
51
+
52
  download_and_extract_gdrive(
53
  file_id="your_lancedb_file_id", # Replace with actual Google Drive file ID
54
  destination="lancedb_directory_main.zip",
55
  extract_to="./lancedb_directory_main"
56
  )
57
 
58
+ # Validate extracted files
59
+ expected_files = [
60
  "enhanced_papers_pretrained_1.lance",
61
  "enhanced_papers_pretrained_2.lance",
62
  "enhanced_papers_finetuned.lance"
63
  ]
64
+ for file in expected_files:
65
+ file_path = os.path.join("./lancedb_directory_main", file)
66
+ if not os.path.isfile(file_path):
67
+ raise FileNotFoundError(f"Expected file is missing: {file_path}")
68
 
69
  # # --------------------------- Load the LanceDB Table and Models --------------------------- #
70