abadesalex's picture
test
714be4e
raw
history blame
1.66 kB
import io
import os
from fastapi import UploadFile
import pdfplumber
from app.db_local_storage.files_db import TEXT_FILES_DIRECTORY
from app.db_local_storage.documents_db import documents_text
class ExtractTextFeature:
# @staticmethod
# def ensure_directory_exists(folder_path: str) -> None:
# """Ensure that the directory exists."""
# if not os.path.exists(folder_path):
# os.makedirs(folder_path)
# @staticmethod
# async def saveFile(content, filename, directory: str) -> str:
# """Save the uploaded file to the specified directory."""
# file_path = os.path.join(directory, filename)
# with open(file_path, "w") as file:
# file.write(content)
# return file_path
# @staticmethod
# async def save_text_from_pdf(file: UploadFile, text) -> str:
# ExtractTextFeature.ensure_directory_exists(TEXT_FILES_DIRECTORY)
# await ExtractTextFeature.saveFile(text, file.filename, TEXT_FILES_DIRECTORY)
# return text
@staticmethod
async def extract_text_from_pdf(file: UploadFile) -> str:
content = await file.read()
with pdfplumber.open(io.BytesIO(content)) as pdf:
text = ""
for page in pdf.pages:
text += page.extract_text()
for document in documents_text:
if document["filename"] == file.filename:
return {"message": "Document already exists"}
data = {
"id": len(documents_text) + 1,
"filename": file.filename,
"text": text,
}
documents_text.append(data)
return text