Commit
·
1c201ca
1
Parent(s):
badce79
change app add shutil
Browse files
app.py
CHANGED
@@ -21,31 +21,54 @@ nltk.download('omw-1.4')
|
|
21 |
import gdown
|
22 |
import zipfile
|
23 |
import os
|
24 |
-
|
25 |
# Function to download and extract folder
|
26 |
def download_and_extract_gdrive(file_id, destination, extract_to):
|
27 |
# Download the zip file
|
28 |
gdown.download(f"https://drive.google.com/uc?id={file_id}", destination, quiet=False)
|
29 |
|
30 |
-
#
|
|
|
|
|
|
|
31 |
with zipfile.ZipFile(destination, 'r') as zip_ref:
|
32 |
-
zip_ref.extractall(
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
|
|
|
|
38 |
download_and_extract_gdrive(
|
39 |
-
file_id="
|
40 |
-
destination="lancedb_directory_main",
|
41 |
-
extract_to="./"
|
42 |
)
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
# # --------------------------- Load the LanceDB Table and Models --------------------------- #
|
50 |
|
51 |
# Connect to LanceDB
|
|
|
21 |
import gdown
|
22 |
import zipfile
|
23 |
import os
|
24 |
+
import shutil
|
25 |
# Function to download and extract folder
|
26 |
def download_and_extract_gdrive(file_id, destination, extract_to):
|
27 |
# Download the zip file
|
28 |
gdown.download(f"https://drive.google.com/uc?id={file_id}", destination, quiet=False)
|
29 |
|
30 |
+
# Use a temporary directory to avoid conflicts
|
31 |
+
temp_dir = "./temp_extract"
|
32 |
+
os.makedirs(temp_dir, exist_ok=True)
|
33 |
+
|
34 |
with zipfile.ZipFile(destination, 'r') as zip_ref:
|
35 |
+
zip_ref.extractall(temp_dir)
|
36 |
+
|
37 |
+
# Ensure the main directory exists
|
38 |
+
if not os.path.exists(extract_to):
|
39 |
+
os.makedirs(extract_to, exist_ok=True)
|
40 |
+
|
41 |
+
# Move extracted files into the target directory
|
42 |
+
for item in os.listdir(temp_dir):
|
43 |
+
item_path = os.path.join(temp_dir, item)
|
44 |
+
if os.path.isdir(item_path):
|
45 |
+
shutil.move(item_path, extract_to)
|
46 |
+
elif os.path.isfile(item_path):
|
47 |
+
shutil.move(item_path, os.path.join(extract_to, os.path.basename(item_path)))
|
48 |
+
|
49 |
+
# Clean up temporary files
|
50 |
+
shutil.rmtree(temp_dir)
|
51 |
+
os.remove(destination)
|
52 |
|
53 |
+
# Download LanceDB directory
|
54 |
+
st.info("Downloading and setting up LanceDB directory. This may take some time...")
|
55 |
download_and_extract_gdrive(
|
56 |
+
file_id="your_lancedb_file_id", # Replace with actual Google Drive file ID
|
57 |
+
destination="lancedb_directory_main.zip",
|
58 |
+
extract_to="./lancedb_directory_main"
|
59 |
)
|
60 |
|
61 |
+
# Ensure LanceDB structure is correct
|
62 |
+
expected_directories = [
|
63 |
+
"enhanced_papers_pretrained_1.lance",
|
64 |
+
"enhanced_papers_pretrained_2.lance",
|
65 |
+
"enhanced_papers_finetuned.lance"
|
66 |
+
]
|
67 |
+
for subdir in expected_directories:
|
68 |
+
subdir_path = os.path.join("./lancedb_directory_main", subdir)
|
69 |
+
if not os.path.isdir(subdir_path):
|
70 |
+
raise FileNotFoundError(f"Missing expected directory: {subdir_path}")
|
71 |
+
|
72 |
# # --------------------------- Load the LanceDB Table and Models --------------------------- #
|
73 |
|
74 |
# Connect to LanceDB
|