Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -50,48 +50,87 @@ except PermissionError as e:
|
|
50 |
|
51 |
document_store = {}
|
52 |
|
|
|
53 |
if os.path.exists(faiss_index_file):
|
54 |
try:
|
55 |
index = faiss.read_index(faiss_index_file)
|
56 |
if index.ntotal > 0:
|
57 |
print(f"FAISS index loaded with {index.ntotal} vectors.")
|
58 |
-
index.reset()
|
59 |
-
index = faiss.IndexIDMap(faiss.IndexFlatL2(384))
|
60 |
else:
|
61 |
-
index = faiss.IndexIDMap(faiss.IndexFlatL2(384))
|
62 |
except Exception as e:
|
63 |
print(f"Error loading FAISS index: {e}, reinitializing.")
|
64 |
-
index = faiss.IndexIDMap(faiss.IndexFlatL2(384))
|
65 |
else:
|
66 |
-
index = faiss.IndexIDMap(faiss.IndexFlatL2(384))
|
67 |
|
68 |
# Function to upload document
|
69 |
def upload_document(file_path, embed_model):
|
|
|
70 |
doc_id = uuid.uuid4().int % (2**63 - 1)
|
|
|
|
|
71 |
file_location = os.path.join(UPLOAD_DIR, os.path.basename(file_path))
|
72 |
-
shutil.copy(file_path, file_location)
|
73 |
|
74 |
-
|
75 |
-
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
80 |
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
|
|
84 |
@app.route("/upload", methods=["POST"])
|
85 |
def handle_upload():
|
|
|
86 |
if "file" not in request.files:
|
87 |
return jsonify({"error": "No file provided"}), 400
|
88 |
|
89 |
file = request.files["file"]
|
|
|
|
|
90 |
file_path = os.path.join(UPLOAD_DIR, file.filename)
|
91 |
-
file.save(file_path)
|
92 |
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
def get_model_and_tokenizer(model_id: str):
|
97 |
"""
|
|
|
50 |
|
51 |
document_store = {}
|
52 |
|
53 |
+
# Check if FAISS index file exists, otherwise initialize it
|
54 |
if os.path.exists(faiss_index_file):
|
55 |
try:
|
56 |
index = faiss.read_index(faiss_index_file)
|
57 |
if index.ntotal > 0:
|
58 |
print(f"FAISS index loaded with {index.ntotal} vectors.")
|
59 |
+
index.reset() # Resetting the index if non-zero entries
|
60 |
+
index = faiss.IndexIDMap(faiss.IndexFlatL2(384)) # Reinitialize the index
|
61 |
else:
|
62 |
+
index = faiss.IndexIDMap(faiss.IndexFlatL2(384)) # Initialize with flat L2 distance
|
63 |
except Exception as e:
|
64 |
print(f"Error loading FAISS index: {e}, reinitializing.")
|
65 |
+
index = faiss.IndexIDMap(faiss.IndexFlatL2(384)) # Initialize if reading fails
|
66 |
else:
|
67 |
+
index = faiss.IndexIDMap(faiss.IndexFlatL2(384)) # Initialize if file doesn't exist
|
68 |
|
69 |
# Function to upload document
|
70 |
def upload_document(file_path, embed_model):
|
71 |
+
# Generate unique document ID
|
72 |
doc_id = uuid.uuid4().int % (2**63 - 1)
|
73 |
+
|
74 |
+
# Ensure the file is saved to the correct directory with secure handling
|
75 |
file_location = os.path.join(UPLOAD_DIR, os.path.basename(file_path))
|
|
|
76 |
|
77 |
+
# Safely copy the file to the upload directory
|
78 |
+
shutil.copy(file_path, file_location)
|
79 |
|
80 |
+
# Read the content of the uploaded file
|
81 |
+
try:
|
82 |
+
with open(file_location, "r", encoding="utf-8") as f:
|
83 |
+
text = f.read()
|
84 |
+
except Exception as e:
|
85 |
+
print(f"Error reading file {file_location}: {e}")
|
86 |
+
return
|
87 |
|
88 |
+
# Embed the text and add it to the FAISS index
|
89 |
+
try:
|
90 |
+
vector = embed_model.encode(text).astype("float32")
|
91 |
+
index.add_with_ids(np.array([vector]), np.array([doc_id], dtype=np.int64))
|
92 |
+
document_store[doc_id] = {"path": file_location, "text": text}
|
93 |
+
|
94 |
+
# Save the FAISS index after adding the document
|
95 |
+
faiss.write_index(index, faiss_index_file)
|
96 |
+
print(f"Document uploaded with doc_id: {doc_id}")
|
97 |
+
except Exception as e:
|
98 |
+
print(f"Error during document upload: {e}")
|
99 |
|
100 |
+
@app.route("/upload", methods=["POST"])
|
101 |
@app.route("/upload", methods=["POST"])
|
102 |
def handle_upload():
|
103 |
+
# Check if the request contains the file
|
104 |
if "file" not in request.files:
|
105 |
return jsonify({"error": "No file provided"}), 400
|
106 |
|
107 |
file = request.files["file"]
|
108 |
+
|
109 |
+
# Ensure the filename is safe and construct the full file path
|
110 |
file_path = os.path.join(UPLOAD_DIR, file.filename)
|
|
|
111 |
|
112 |
+
# Ensure the upload directory exists and has correct permissions
|
113 |
+
try:
|
114 |
+
os.makedirs(UPLOAD_DIR, exist_ok=True) # Ensure the directory exists
|
115 |
+
if not os.access(UPLOAD_DIR, os.W_OK): # Check write permissions
|
116 |
+
os.chmod(UPLOAD_DIR, 0o777)
|
117 |
+
except PermissionError as e:
|
118 |
+
return jsonify({"error": f"Permission error with upload directory: {e}"}), 500
|
119 |
+
|
120 |
+
try:
|
121 |
+
# Save the file to the upload directory
|
122 |
+
file.save(file_path)
|
123 |
+
except Exception as e:
|
124 |
+
return jsonify({"error": f"Error saving file: {e}"}), 500
|
125 |
+
|
126 |
+
# Process the document using the upload_document function
|
127 |
+
try:
|
128 |
+
upload_document(file_path, bertmodel) # Assuming 'bertmodel' is defined elsewhere
|
129 |
+
except Exception as e:
|
130 |
+
return jsonify({"error": f"Error processing file: {e}"}), 500
|
131 |
+
|
132 |
+
# Return success response
|
133 |
+
return jsonify({"message": "File uploaded and processed successfully"}), 200
|
134 |
|
135 |
def get_model_and_tokenizer(model_id: str):
|
136 |
"""
|