Spaces:
Sleeping
Sleeping
# from setup.easy_imports import PyPDFLoader | |
import os | |
from langchain_community.document_loaders import PyPDFLoader | |
import tempfile | |
import requests | |
from _utils.handle_files import return_document_list_with_llama_parser | |
headers = {"Authorization": f"Bearer {os.environ.get("BUBBLE_TOKEN")}"} | |
# def obter_arquivo(id_arquivo="1735864318176x375804955201372160"): | |
# return requests.get( | |
# f"https://vella.app.br/version-test/api/1.1/obj/formresponseanswer/{id_arquivo}", | |
# headers=headers, | |
# ) | |
async def get_pdf_from_bubble( | |
file_url=f"https://vella.app.br/version-test/fileupload/f1735864316650x718601440484441900/Boleto_DIGITICS%20Servic%CC%A7os%20de%20Secretariado%20LTDA_30_12_2024_804841714.pdf", | |
should_use_llama_parse=False, | |
): | |
if should_use_llama_parse: | |
response = requests.get(file_url, headers=headers) | |
response.raise_for_status() | |
# Create a NamedTemporaryFile (with a .pdf suffix) to store the file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
# Write the response content to the temporary file | |
tmp_file.write(response.content) | |
return await return_document_list_with_llama_parser( | |
tmp_file.name | |
) # por enquanto este arquivo não está sendo excluído | |
else: | |
result = PyPDFLoader(file_url, headers=headers) | |
return result.load() | |