Spaces:
Sleeping
Sleeping
import io | |
import os | |
from fastapi import UploadFile | |
import pdfplumber | |
from app.db_local_storage.files_db import TEXT_FILES_DIRECTORY | |
from app.db_local_storage.documents_db import documents_text | |
class ExtractTextFeature: | |
# @staticmethod | |
# def ensure_directory_exists(folder_path: str) -> None: | |
# """Ensure that the directory exists.""" | |
# if not os.path.exists(folder_path): | |
# os.makedirs(folder_path) | |
# @staticmethod | |
# async def saveFile(content, filename, directory: str) -> str: | |
# """Save the uploaded file to the specified directory.""" | |
# file_path = os.path.join(directory, filename) | |
# with open(file_path, "w") as file: | |
# file.write(content) | |
# return file_path | |
# @staticmethod | |
# async def save_text_from_pdf(file: UploadFile, text) -> str: | |
# ExtractTextFeature.ensure_directory_exists(TEXT_FILES_DIRECTORY) | |
# await ExtractTextFeature.saveFile(text, file.filename, TEXT_FILES_DIRECTORY) | |
# return text | |
async def extract_text_from_pdf(file: UploadFile) -> str: | |
content = await file.read() | |
with pdfplumber.open(io.BytesIO(content)) as pdf: | |
text = "" | |
for page in pdf.pages: | |
text += page.extract_text() | |
for document in documents_text: | |
if document["filename"] == file.filename: | |
return {"message": "Document already exists"} | |
data = { | |
"id": len(documents_text) + 1, | |
"filename": file.filename, | |
"text": text, | |
} | |
documents_text.append(data) | |
return text | |