phyloforfun's picture
Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing
dbaeac5
raw
history blame
8.09 kB
import os, json, re
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google.oauth2 import service_account
import base64
from PIL import Image
from PIL import Image
from io import BytesIO
from shutil import copyfileobj, copyfile
# from vouchervision.general_utils import get_cfg_from_full_path
def setup_streamlit_config(dir_home):
# Define the directory path and filename
dir_path = os.path.join(dir_home, ".streamlit")
file_path = os.path.join(dir_path, "config.toml")
# Check if directory exists, if not create it
if not os.path.exists(dir_path):
os.makedirs(dir_path)
# Create or modify the file with the provided content
config_content = f"""
[theme]
base = "dark"
primaryColor = "#00ff00"
[server]
enableStaticServing = false
runOnSave = true
port = 8524
maxUploadSize = 5000
"""
with open(file_path, "w") as f:
f.write(config_content.strip())
def save_uploaded_file_local(directory_in, directory_out, img_file_name, image=None):
if not os.path.exists(directory_out):
os.makedirs(directory_out)
# Assuming img_file_name includes the extension
img_file_base, img_file_ext = os.path.splitext(img_file_name)
full_path_out = os.path.join(directory_out, img_file_name)
full_path_in = os.path.join(directory_in, img_file_name)
# Check if the file extension is .pdf (or add other conditions for different file types)
if img_file_ext.lower() == '.pdf':
# Copy the file from the input directory to the output directory
copyfile(full_path_in, full_path_out)
return full_path_out
else:
if image is None:
try:
with Image.open(full_path_in) as image:
image.save(full_path_out, "JPEG")
# Return the full path of the saved image
return full_path_out
except:
pass
else:
try:
image.save(full_path_out, "JPEG")
return full_path_out
except:
pass
def save_uploaded_file(directory, img_file, image=None):
if not os.path.exists(directory):
os.makedirs(directory)
full_path = os.path.join(directory, img_file.name)
# Assuming the uploaded file is an image
if img_file.name.lower().endswith('.pdf'):
with open(full_path, 'wb') as out_file:
# If img_file is a file-like object (e.g., Django's UploadedFile),
# you can use copyfileobj or read chunks.
# If it's a path, you'd need to open and then save it.
if hasattr(img_file, 'read'):
# This is a file-like object
copyfileobj(img_file, out_file)
else:
# If img_file is a path string
with open(img_file, 'rb') as fd:
copyfileobj(fd, out_file)
return full_path
else:
if image is None:
try:
with Image.open(img_file) as image:
full_path = os.path.join(directory, img_file.name)
image.save(full_path, "JPEG")
# Return the full path of the saved image
return full_path
except:
with Image.open(os.path.join(directory,img_file)) as image:
full_path = os.path.join(directory, img_file)
image.save(full_path, "JPEG")
# Return the full path of the saved image
return full_path
else:
try:
full_path = os.path.join(directory, img_file.name)
image.save(full_path, "JPEG")
return full_path
except:
full_path = os.path.join(directory, img_file)
image.save(full_path, "JPEG")
return full_path
def save_uploaded_local(directory, img_file, image=None):
name = img_file.split(os.path.sep)[-1]
if not os.path.exists(directory):
os.makedirs(directory)
# Assuming the uploaded file is an image
if image is None:
with Image.open(img_file) as image:
full_path = os.path.join(directory, name)
image.save(full_path, "JPEG")
# Return the full path of the saved image
return os.path.join('uploads_small',name)
else:
full_path = os.path.join(directory, name)
image.save(full_path, "JPEG")
return os.path.join('.','uploads_small',name)
def image_to_base64(img):
buffered = BytesIO()
img.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode()
def check_prompt_yaml_filename(fname):
# Check if the filename only contains letters, numbers, underscores, and dashes
pattern = r'^[\w-]+$'
# The \w matches any alphanumeric character and is equivalent to the character class [a-zA-Z0-9_].
# The hyphen - is literally matched.
if re.match(pattern, fname):
return True
else:
return False
# Function to upload files to Google Drive
def upload_to_drive(filepath, filename, is_hf=True, cfg_private=None, do_upload = True):
if do_upload:
creds = get_google_credentials(is_hf=is_hf, cfg_private=cfg_private)
if creds:
service = build('drive', 'v3', credentials=creds)
# Get the folder ID from the environment variable
if is_hf:
folder_id = os.environ.get('GDRIVE_FOLDER_ID') # Renamed for clarity
else:
folder_id = cfg_private['google']['GDRIVE_FOLDER_ID'] # Renamed for clarity
if folder_id:
file_metadata = {
'name': filename,
'parents': [folder_id]
}
# Determine the mimetype based on the file extension
if filename.endswith('.yaml') or filename.endswith('.yml') or filepath.endswith('.yaml') or filepath.endswith('.yml'):
mimetype = 'application/x-yaml'
elif filepath.endswith('.zip'):
mimetype = 'application/zip'
else:
# Set a default mimetype if desired or handle the unsupported file type
print("Unsupported file type")
return None
# Upload the file
try:
media = MediaFileUpload(filepath, mimetype=mimetype)
file = service.files().create(
body=file_metadata,
media_body=media,
fields='id'
).execute()
print(f"Uploaded file with ID: {file.get('id')}")
except Exception as e:
msg = f"If the following error is '404 cannot find file...' then you need to share the GDRIVE folder with your Google API service account's email address. Open your Google API JSON file, find the email account that ends with '@developer.gserviceaccount.com', go to your Google Drive, share the folder with this email account. {e}"
print(msg)
raise Exception(msg)
else:
print("GDRIVE_API environment variable not set.")
def get_google_credentials(is_hf=True, cfg_private=None): # Also used for google drive
if is_hf:
creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
return credentials
else:
with open(cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS'], 'r') as file:
data = json.load(file)
creds_json_str = json.dumps(data)
credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = creds_json_str
return credentials