YALCINKAYA commited on
Commit
e7602b9
·
verified ·
1 Parent(s): fed4fb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -16
app.py CHANGED
@@ -50,48 +50,87 @@ except PermissionError as e:
50
 
51
  document_store = {}
52
 
 
53
  if os.path.exists(faiss_index_file):
54
  try:
55
  index = faiss.read_index(faiss_index_file)
56
  if index.ntotal > 0:
57
  print(f"FAISS index loaded with {index.ntotal} vectors.")
58
- index.reset()
59
- index = faiss.IndexIDMap(faiss.IndexFlatL2(384))
60
  else:
61
- index = faiss.IndexIDMap(faiss.IndexFlatL2(384))
62
  except Exception as e:
63
  print(f"Error loading FAISS index: {e}, reinitializing.")
64
- index = faiss.IndexIDMap(faiss.IndexFlatL2(384))
65
  else:
66
- index = faiss.IndexIDMap(faiss.IndexFlatL2(384))
67
 
68
  # Function to upload document
69
  def upload_document(file_path, embed_model):
 
70
  doc_id = uuid.uuid4().int % (2**63 - 1)
 
 
71
  file_location = os.path.join(UPLOAD_DIR, os.path.basename(file_path))
72
- shutil.copy(file_path, file_location)
73
 
74
- with open(file_location, "r", encoding="utf-8") as f:
75
- text = f.read()
76
 
77
- vector = embed_model.encode(text).astype("float32")
78
- index.add_with_ids(np.array([vector]), np.array([doc_id], dtype=np.int64))
79
- document_store[doc_id] = {"path": file_location, "text": text}
 
 
 
 
80
 
81
- faiss.write_index(index, faiss_index_file)
82
- print(f"Document uploaded with doc_id: {doc_id}")
 
 
 
 
 
 
 
 
 
83
 
 
84
  @app.route("/upload", methods=["POST"])
85
  def handle_upload():
 
86
  if "file" not in request.files:
87
  return jsonify({"error": "No file provided"}), 400
88
 
89
  file = request.files["file"]
 
 
90
  file_path = os.path.join(UPLOAD_DIR, file.filename)
91
- file.save(file_path)
92
 
93
- upload_document(file_path, bertmodel)
94
- return jsonify({"message": "File uploaded successfully"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  def get_model_and_tokenizer(model_id: str):
97
  """
 
50
 
51
  document_store = {}
52
 
53
+ # Check if FAISS index file exists, otherwise initialize it
54
  if os.path.exists(faiss_index_file):
55
  try:
56
  index = faiss.read_index(faiss_index_file)
57
  if index.ntotal > 0:
58
  print(f"FAISS index loaded with {index.ntotal} vectors.")
59
+ index.reset() # Resetting the index if non-zero entries
60
+ index = faiss.IndexIDMap(faiss.IndexFlatL2(384)) # Reinitialize the index
61
  else:
62
+ index = faiss.IndexIDMap(faiss.IndexFlatL2(384)) # Initialize with flat L2 distance
63
  except Exception as e:
64
  print(f"Error loading FAISS index: {e}, reinitializing.")
65
+ index = faiss.IndexIDMap(faiss.IndexFlatL2(384)) # Initialize if reading fails
66
  else:
67
+ index = faiss.IndexIDMap(faiss.IndexFlatL2(384)) # Initialize if file doesn't exist
68
 
69
  # Function to upload document
70
  def upload_document(file_path, embed_model):
71
+ # Generate unique document ID
72
  doc_id = uuid.uuid4().int % (2**63 - 1)
73
+
74
+ # Ensure the file is saved to the correct directory with secure handling
75
  file_location = os.path.join(UPLOAD_DIR, os.path.basename(file_path))
 
76
 
77
+ # Safely copy the file to the upload directory
78
+ shutil.copy(file_path, file_location)
79
 
80
+ # Read the content of the uploaded file
81
+ try:
82
+ with open(file_location, "r", encoding="utf-8") as f:
83
+ text = f.read()
84
+ except Exception as e:
85
+ print(f"Error reading file {file_location}: {e}")
86
+ return
87
 
88
+ # Embed the text and add it to the FAISS index
89
+ try:
90
+ vector = embed_model.encode(text).astype("float32")
91
+ index.add_with_ids(np.array([vector]), np.array([doc_id], dtype=np.int64))
92
+ document_store[doc_id] = {"path": file_location, "text": text}
93
+
94
+ # Save the FAISS index after adding the document
95
+ faiss.write_index(index, faiss_index_file)
96
+ print(f"Document uploaded with doc_id: {doc_id}")
97
+ except Exception as e:
98
+ print(f"Error during document upload: {e}")
99
 
100
+ @app.route("/upload", methods=["POST"])
101
  @app.route("/upload", methods=["POST"])
102
  def handle_upload():
103
+ # Check if the request contains the file
104
  if "file" not in request.files:
105
  return jsonify({"error": "No file provided"}), 400
106
 
107
  file = request.files["file"]
108
+
109
+ # Ensure the filename is safe and construct the full file path
110
  file_path = os.path.join(UPLOAD_DIR, file.filename)
 
111
 
112
+ # Ensure the upload directory exists and has correct permissions
113
+ try:
114
+ os.makedirs(UPLOAD_DIR, exist_ok=True) # Ensure the directory exists
115
+ if not os.access(UPLOAD_DIR, os.W_OK): # Check write permissions
116
+ os.chmod(UPLOAD_DIR, 0o777)
117
+ except PermissionError as e:
118
+ return jsonify({"error": f"Permission error with upload directory: {e}"}), 500
119
+
120
+ try:
121
+ # Save the file to the upload directory
122
+ file.save(file_path)
123
+ except Exception as e:
124
+ return jsonify({"error": f"Error saving file: {e}"}), 500
125
+
126
+ # Process the document using the upload_document function
127
+ try:
128
+ upload_document(file_path, bertmodel) # Assuming 'bertmodel' is defined elsewhere
129
+ except Exception as e:
130
+ return jsonify({"error": f"Error processing file: {e}"}), 500
131
+
132
+ # Return success response
133
+ return jsonify({"message": "File uploaded and processed successfully"}), 200
134
 
135
  def get_model_and_tokenizer(model_id: str):
136
  """