Pamudu13 commited on
Commit
a0d55b9
·
verified ·
1 Parent(s): a3dcdff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -14
app.py CHANGED
@@ -24,10 +24,11 @@ index = faiss.IndexFlatL2(vector_dim) # FAISS index
24
 
25
  documents = [] # Store extracted text
26
 
27
- def extract_text_from_pdf(pdf_path):
28
- """Extracts text from PDF"""
29
- doc = fitz.open(pdf_path)
30
  text_chunks = [page.get_text("text") for page in doc]
 
31
  return text_chunks
32
 
33
  def create_vector_db(text_chunks):
@@ -96,26 +97,30 @@ def index():
96
  """Serve the HTML page for the user interface"""
97
  return render_template('index.html')
98
 
99
- UPLOAD_FOLDER = "/tmp/uploaded_files"
100
- os.makedirs(UPLOAD_FOLDER, exist_ok=True) # Ensure the folder exists
101
-
102
  @app.route('/upload_pdf', methods=['POST'])
103
  def upload_pdf():
104
  """Handle PDF upload"""
105
  if 'pdf' not in request.files:
106
- return jsonify({"error": "No file part"}), 400 # Handle missing file
107
 
108
  file = request.files['pdf']
109
  if file.filename == "":
110
- return jsonify({"error": "No selected file"}), 400 # Handle empty filename
111
-
112
- pdf_path = os.path.join(UPLOAD_FOLDER, file.filename)
113
 
114
  try:
115
- file.save(pdf_path) # Save the uploaded PDF
116
-
117
- # Extract text and create vector database
118
- text_chunks = extract_text_from_pdf(pdf_path)
 
 
 
 
 
 
 
 
 
119
  create_vector_db(text_chunks)
120
 
121
  return jsonify({"message": "PDF uploaded and indexed successfully!"}), 200
 
24
 
25
  documents = [] # Store extracted text
26
 
27
+ def extract_text_from_pdf(pdf_stream):
28
+ """Extracts text from PDF stream"""
29
+ doc = fitz.open(stream=pdf_stream, filetype="pdf")
30
  text_chunks = [page.get_text("text") for page in doc]
31
+ doc.close()
32
  return text_chunks
33
 
34
  def create_vector_db(text_chunks):
 
97
  """Serve the HTML page for the user interface"""
98
  return render_template('index.html')
99
 
 
 
 
100
  @app.route('/upload_pdf', methods=['POST'])
101
  def upload_pdf():
102
  """Handle PDF upload"""
103
  if 'pdf' not in request.files:
104
+ return jsonify({"error": "No file part"}), 400
105
 
106
  file = request.files['pdf']
107
  if file.filename == "":
108
+ return jsonify({"error": "No selected file"}), 400
 
 
109
 
110
  try:
111
+ # Read the file directly into memory instead of saving to disk
112
+ pdf_stream = file.read()
113
+
114
+ # Create a BytesIO object to work with the PDF in memory
115
+ from io import BytesIO
116
+ pdf_stream = BytesIO(pdf_stream)
117
+
118
+ # Use fitz to open the PDF from memory
119
+ doc = fitz.open(stream=pdf_stream, filetype="pdf")
120
+ text_chunks = [page.get_text("text") for page in doc]
121
+ doc.close()
122
+
123
+ # Create vector database
124
  create_vector_db(text_chunks)
125
 
126
  return jsonify({"message": "PDF uploaded and indexed successfully!"}), 200