import io import os import PyPDF2 from fastapi import UploadFile from PyPDF2 import PdfReader import pdfplumber from src.db_local_storage.files_db import TEXT_FILES_DIRECTORY from src.db_local_storage.documents_db import documents_text class ExtractTextFeature: @staticmethod def ensure_directory_exists(folder_path: str) -> None: """Ensure that the directory exists.""" if not os.path.exists(folder_path): os.makedirs(folder_path) @staticmethod async def saveFile(content, filename, directory: str) -> str: """Save the uploaded file to the specified directory.""" file_path = os.path.join(directory, filename) with open(file_path, "w") as file: file.write(content) return file_path @staticmethod async def save_text_from_pdf(file: UploadFile, text) -> str: ExtractTextFeature.ensure_directory_exists(TEXT_FILES_DIRECTORY) await ExtractTextFeature.saveFile(text, file.filename, TEXT_FILES_DIRECTORY) return text @staticmethod async def extract_text_from_pdf(file: UploadFile) -> str: content = await file.read() with pdfplumber.open(io.BytesIO(content)) as pdf: text = "" for page in pdf.pages: text += page.extract_text() for document in documents_text: if document["filename"] == file.filename: return {"message": "Document already exists"} data = { "id": len(documents_text) + 1, "filename": file.filename, "text": text, } documents_text.append(data) return text