import io import os from fastapi import UploadFile import pdfplumber from app.db_local_storage.files_db import TEXT_FILES_DIRECTORY from app.db_local_storage.documents_db import documents_text class ExtractTextFeature: # @staticmethod # def ensure_directory_exists(folder_path: str) -> None: # """Ensure that the directory exists.""" # if not os.path.exists(folder_path): # os.makedirs(folder_path) # @staticmethod # async def saveFile(content, filename, directory: str) -> str: # """Save the uploaded file to the specified directory.""" # file_path = os.path.join(directory, filename) # with open(file_path, "w") as file: # file.write(content) # return file_path # @staticmethod # async def save_text_from_pdf(file: UploadFile, text) -> str: # ExtractTextFeature.ensure_directory_exists(TEXT_FILES_DIRECTORY) # await ExtractTextFeature.saveFile(text, file.filename, TEXT_FILES_DIRECTORY) # return text @staticmethod async def extract_text_from_pdf(file: UploadFile) -> str: content = await file.read() with pdfplumber.open(io.BytesIO(content)) as pdf: text = "" for page in pdf.pages: text += page.extract_text() for document in documents_text: if document["filename"] == file.filename: return {"message": "Document already exists"} data = { "id": len(documents_text) + 1, "filename": file.filename, "text": text, } documents_text.append(data) return text