abadesalex's picture
UI
819bacd
raw
history blame
1.67 kB
import io
import os
import PyPDF2
from fastapi import UploadFile
from PyPDF2 import PdfReader
import pdfplumber
from src.db_local_storage.files_db import TEXT_FILES_DIRECTORY
from src.db_local_storage.documents_db import documents_text
class ExtractTextFeature:
@staticmethod
def ensure_directory_exists(folder_path: str) -> None:
"""Ensure that the directory exists."""
if not os.path.exists(folder_path):
os.makedirs(folder_path)
@staticmethod
async def saveFile(content, filename, directory: str) -> str:
"""Save the uploaded file to the specified directory."""
file_path = os.path.join(directory, filename)
with open(file_path, "w") as file:
file.write(content)
return file_path
@staticmethod
async def save_text_from_pdf(file: UploadFile, text) -> str:
ExtractTextFeature.ensure_directory_exists(TEXT_FILES_DIRECTORY)
await ExtractTextFeature.saveFile(text, file.filename, TEXT_FILES_DIRECTORY)
return text
@staticmethod
async def extract_text_from_pdf(file: UploadFile) -> str:
content = await file.read()
with pdfplumber.open(io.BytesIO(content)) as pdf:
text = ""
for page in pdf.pages:
text += page.extract_text()
for document in documents_text:
if document["filename"] == file.filename:
return {"message": "Document already exists"}
data = {
"id": len(documents_text) + 1,
"filename": file.filename,
"text": text,
}
documents_text.append(data)
return text