Commit
·
a61b64b
1
Parent(s):
0909f8b
change app add shutil for lance and finetuned
Browse files
app.py
CHANGED
@@ -23,6 +23,15 @@ import zipfile
|
|
23 |
import os
|
24 |
import shutil
|
25 |
# Function to download and extract folder
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def download_and_extract_gdrive(file_id, destination, extract_to):
|
27 |
# Download the zip file
|
28 |
gdown.download(f"https://drive.google.com/uc?id={file_id}", destination, quiet=False)
|
@@ -46,15 +55,22 @@ def download_and_extract_gdrive(file_id, destination, extract_to):
|
|
46 |
shutil.rmtree(temp_dir)
|
47 |
os.remove(destination)
|
48 |
|
49 |
-
|
50 |
-
|
|
|
51 |
|
52 |
download_and_extract_gdrive(
|
53 |
-
file_id="
|
54 |
-
destination="lancedb_directory_main
|
55 |
-
extract_to="./
|
56 |
)
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
# Validate extracted files
|
59 |
expected_files = [
|
60 |
"enhanced_papers_pretrained_1.lance",
|
@@ -65,9 +81,6 @@ for file in expected_files:
|
|
65 |
file_path = os.path.join("./lancedb_directory_main", file)
|
66 |
if not os.path.isfile(file_path):
|
67 |
raise FileNotFoundError(f"Expected file is missing: {file_path}")
|
68 |
-
|
69 |
-
# # --------------------------- Load the LanceDB Table and Models --------------------------- #
|
70 |
-
|
71 |
# Connect to LanceDB
|
72 |
DB_PATH = "./lancedb_directory_main"
|
73 |
TABLE_NAME_1 = "enhanced_papers_pretrained_1"
|
|
|
23 |
import os
|
24 |
import shutil
|
25 |
# Function to download and extract folder
|
26 |
+
def download_and_extract_gdrive_finetuned(file_id, destination, extract_to):
|
27 |
+
# Download the zip file
|
28 |
+
gdown.download(f"https://drive.google.com/uc?id={file_id}", destination, quiet=False)
|
29 |
+
|
30 |
+
# Extract the zip file
|
31 |
+
with zipfile.ZipFile(destination, 'r') as zip_ref:
|
32 |
+
zip_ref.extractall(extract_to)
|
33 |
+
os.remove(destination) # Clean up the downloaded zip file
|
34 |
+
|
35 |
def download_and_extract_gdrive(file_id, destination, extract_to):
|
36 |
# Download the zip file
|
37 |
gdown.download(f"https://drive.google.com/uc?id={file_id}", destination, quiet=False)
|
|
|
55 |
shutil.rmtree(temp_dir)
|
56 |
os.remove(destination)
|
57 |
|
58 |
+
|
59 |
+
# Download and extract LanceDB and fine-tuned model
|
60 |
+
st.info("Downloading and setting up necessary data. This might take a while...")
|
61 |
|
62 |
download_and_extract_gdrive(
|
63 |
+
file_id="1Qnb8bs_NXWlhDwGoswOgsp2DiLBMbfSY", # Replace with the actual Google Drive file ID
|
64 |
+
destination="lancedb_directory_main",
|
65 |
+
extract_to="./"
|
66 |
)
|
67 |
|
68 |
+
download_and_extract_gdrive_finetuned(
|
69 |
+
file_id="1_9VVuN_P3zsTBYzg0lAeh4ghd9zhXS3w", # Replace with the actual Google Drive file ID
|
70 |
+
destination="finetuned_all_minilm_l6_v2",
|
71 |
+
extract_to="./"
|
72 |
+
)
|
73 |
+
# # --------------------------- Load the LanceDB Table and Models --------------------------- #
|
74 |
# Validate extracted files
|
75 |
expected_files = [
|
76 |
"enhanced_papers_pretrained_1.lance",
|
|
|
81 |
file_path = os.path.join("./lancedb_directory_main", file)
|
82 |
if not os.path.isfile(file_path):
|
83 |
raise FileNotFoundError(f"Expected file is missing: {file_path}")
|
|
|
|
|
|
|
84 |
# Connect to LanceDB
|
85 |
DB_PATH = "./lancedb_directory_main"
|
86 |
TABLE_NAME_1 = "enhanced_papers_pretrained_1"
|