awacke1 commited on
Commit
58aea7a
Β·
verified Β·
1 Parent(s): 573b1f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +928 -104
app.py CHANGED
@@ -1,115 +1,939 @@
1
- # app.py (streamlined and modularized for clarity and maintainability)
2
- import streamlit as st
3
  import os
4
- import glob
5
  import base64
6
- import pandas as pd
7
- import fitz
8
- from PIL import Image
9
- from io import BytesIO
 
 
 
 
 
 
10
  from datetime import datetime
 
 
 
 
 
 
 
 
11
  from reportlab.pdfgen import canvas
12
  from reportlab.lib.utils import ImageReader
13
- from markdown2 import markdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # --- Config ---
16
  st.set_page_config(
17
- page_title="Vision & Layout Titans πŸš€",
18
  page_icon="πŸ€–",
19
- layout="wide"
 
 
 
 
 
 
20
  )
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # --- Helper Functions ---
23
- def get_files(exts):
24
- files = []
25
- for ext in exts:
26
- files.extend(glob.glob(f'*.{ext}'))
27
- return sorted([f for f in files if f.lower() != "readme.md"])
28
-
29
- def image_to_pdf(images, md_files):
30
- buffer = BytesIO()
31
- c = canvas.Canvas(buffer)
32
-
33
- for img_path in images:
34
- img = Image.open(img_path)
35
- width, height = img.size
36
- c.setPageSize((width, height))
37
- c.drawImage(ImageReader(img), 0, 0, width, height)
38
- c.showPage()
39
-
40
- for md_path in md_files:
41
- with open(md_path, 'r', encoding='utf-8') as f:
42
- md_content = f.read()
43
- html = markdown(md_content)
44
- c.setPageSize((595, 842)) # A4 size
45
- c.setFont("Helvetica", 10)
46
- c.drawString(50, 800, md_content[:1000]) # Simplified render
47
- c.showPage()
48
-
49
- c.save()
50
- buffer.seek(0)
51
- return buffer
52
-
53
- def render_pdf_gallery(pdf_files):
54
- for pdf_file in pdf_files:
55
- doc = fitz.open(pdf_file)
56
- page = doc.load_page(0)
57
- pix = page.get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
58
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
59
- st.image(img, caption=os.path.basename(pdf_file))
60
-
61
- # --- Sidebar Management ---
62
- st.sidebar.header("Content Management")
63
-
64
- image_files = get_files(['png', 'jpg', 'jpeg'])
65
- pdf_files = get_files(['pdf'])
66
- md_files = get_files(['md'])
67
-
68
- selected_images = st.sidebar.multiselect("Select Images", image_files)
69
- selected_md_files = st.sidebar.multiselect("Select Markdown Files", md_files)
70
-
71
- # PDF Gallery
72
- if st.sidebar.checkbox("Show PDF Gallery"):
73
- render_pdf_gallery(pdf_files)
74
-
75
- # PDF Generation with rearrangement
76
- st.sidebar.subheader("Reorder Content for PDF")
77
- content_df = pd.DataFrame({
78
- "Content": selected_images + selected_md_files,
79
- "Type": ["Image"] * len(selected_images) + ["Markdown"] * len(selected_md_files),
80
- "Order": range(len(selected_images) + len(selected_md_files))
81
- })
82
-
83
- edited_df = st.sidebar.data_editor(content_df, use_container_width=True)
84
- sorted_contents = edited_df.sort_values('Order')['Content'].tolist()
85
-
86
- if st.sidebar.button("Generate PDF"):
87
- sorted_images = [item for item in sorted_contents if item in selected_images]
88
- sorted_md_files = [item for item in sorted_contents if item in selected_md_files]
89
- pdf_buffer = image_to_pdf(sorted_images, sorted_md_files)
90
- st.sidebar.download_button("Download PDF", pdf_buffer, "output.pdf")
91
-
92
- # Deletion
93
- st.sidebar.subheader("Delete Files")
94
- file_to_delete = st.sidebar.selectbox("Select file to delete", image_files + pdf_files + md_files)
95
- if st.sidebar.button("Delete Selected File"):
96
- os.remove(file_to_delete)
97
- st.sidebar.success(f"Deleted {file_to_delete}")
98
- st.rerun()
99
-
100
- # --- Main Page ---
101
- st.title("Vision & Layout Titans πŸš€")
102
- st.markdown("### Manage, View, and Export Your Files Easily!")
103
-
104
- # Display selected images
105
- st.subheader("Selected Images")
106
- for img_path in selected_images:
107
- img = Image.open(img_path)
108
- st.image(img, caption=os.path.basename(img_path))
109
-
110
- # Display selected markdown
111
- st.subheader("Selected Markdown")
112
- for md_path in selected_md_files:
113
- with open(md_path, 'r', encoding='utf-8') as f:
114
- md_content = f.read()
115
- st.markdown(md_content[:500] + '...')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
 
2
  import os
3
+ import re
4
  import base64
5
+ import glob
6
+ import logging
7
+ import random
8
+ import shutil
9
+ import time
10
+ import zipfile
11
+ import json
12
+ import asyncio
13
+ import aiofiles
14
+ import toml
15
  from datetime import datetime
16
+ from collections import Counter
17
+ from dataclasses import dataclass, field
18
+ from io import BytesIO
19
+ from typing import Optional, List, Dict, Any
20
+ import pandas as pd
21
+ import pytz
22
+ import streamlit as st
23
+ from PIL import Image, ImageDraw
24
  from reportlab.pdfgen import canvas
25
  from reportlab.lib.utils import ImageReader
26
+ from reportlab.lib.pagesizes import letter
27
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
28
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
29
+ from reportlab.lib.enums import TA_JUSTIFY
30
+ import fitz
31
+ import requests
32
+ try:
33
+ import torch
34
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, AutoModelForVision2Seq, pipeline
35
+ _transformers_available = True
36
+ except ImportError:
37
+ _transformers_available = False
38
+ st.sidebar.warning("AI/ML libraries (torch, transformers) not found. Local model features disabled.")
39
+ try:
40
+ from diffusers import StableDiffusionPipeline
41
+ _diffusers_available = True
42
+ except ImportError:
43
+ _diffusers_available = False
44
+ if _transformers_available:
45
+ st.sidebar.warning("Diffusers library not found. Diffusion model features disabled.")
46
+ try:
47
+ from openai import OpenAI
48
+ _openai_available = True
49
+ except ImportError:
50
+ _openai_available = False
51
+ st.sidebar.warning("OpenAI library not found. OpenAI model features disabled.")
52
+ from huggingface_hub import InferenceClient, HfApi, list_models
53
+ from huggingface_hub.utils import RepositoryNotFoundError, GatedRepoError
54
 
55
+ # --- App Configuration ---
56
  st.set_page_config(
57
+ page_title="Vision & Layout Titans πŸš€πŸ–ΌοΈ",
58
  page_icon="πŸ€–",
59
+ layout="wide",
60
+ initial_sidebar_state="expanded",
61
+ menu_items={
62
+ 'Get Help': 'https://huggingface.co/docs',
63
+ 'Report a Bug': None,
64
+ 'About': "Combined App: Image/MD->PDF Layout + AI-Powered Tools 🌌"
65
+ }
66
  )
67
 
68
+ # --- Secrets Management ---
69
+ try:
70
+ secrets = toml.load(".streamlit/secrets.toml") if os.path.exists(".streamlit/secrets.toml") else {}
71
+ HF_TOKEN = secrets.get("HF_TOKEN", os.getenv("HF_TOKEN", ""))
72
+ OPENAI_API_KEY = secrets.get("OPENAI_API_KEY", os.getenv("OPENAI_API_KEY", ""))
73
+ except Exception as e:
74
+ st.error(f"Error loading secrets: {e}")
75
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
76
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
77
+
78
+ if not HF_TOKEN:
79
+ st.sidebar.warning("Hugging Face token not found in secrets or environment. Some features may be limited.")
80
+ if not OPENAI_API_KEY and _openai_available:
81
+ st.sidebar.warning("OpenAI API key not found in secrets or environment. OpenAI features disabled.")
82
+
83
+ # --- Logging Setup ---
84
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
85
+ logger = logging.getLogger(__name__)
86
+ log_records = []
87
+ class LogCaptureHandler(logging.Handler):
88
+ def emit(self, record):
89
+ log_records.append(record)
90
+ logger.addHandler(LogCaptureHandler())
91
+
92
+ # --- Model Initialization ---
93
+ DEFAULT_PROVIDER = "hf-inference"
94
+ FEATURED_MODELS_LIST = [
95
+ "meta-llama/Meta-Llama-3.1-8B-Instruct",
96
+ "mistralai/Mistral-7B-Instruct-v0.3",
97
+ "google/gemma-2-9b-it",
98
+ "Qwen/Qwen2-7B-Instruct",
99
+ "microsoft/Phi-3-mini-4k-instruct",
100
+ "HuggingFaceH4/zephyr-7b-beta",
101
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
102
+ "HuggingFaceTB/SmolLM-1.7B-Instruct"
103
+ ]
104
+ VISION_MODELS_LIST = [
105
+ "Salesforce/blip-image-captioning-large",
106
+ "microsoft/trocr-large-handwritten",
107
+ "llava-hf/llava-1.5-7b-hf",
108
+ "google/vit-base-patch16-224"
109
+ ]
110
+ DIFFUSION_MODELS_LIST = [
111
+ "stabilityai/stable-diffusion-xl-base-1.0",
112
+ "runwayml/stable-diffusion-v1-5",
113
+ "OFA-Sys/small-stable-diffusion-v0"
114
+ ]
115
+ OPENAI_MODELS_LIST = [
116
+ "gpt-4o",
117
+ "gpt-4-turbo",
118
+ "gpt-3.5-turbo",
119
+ "text-davinci-003"
120
+ ]
121
+ st.session_state.setdefault('local_models', {})
122
+ st.session_state.setdefault('hf_inference_client', None)
123
+ st.session_state.setdefault('openai_client', None)
124
+ if _openai_available and OPENAI_API_KEY:
125
+ try:
126
+ st.session_state['openai_client'] = OpenAI(api_key=OPENAI_API_KEY)
127
+ logger.info("OpenAI client initialized successfully.")
128
+ except Exception as e:
129
+ st.error(f"Failed to initialize OpenAI client: {e}")
130
+ logger.error(f"OpenAI client initialization failed: {e}")
131
+ st.session_state['openai_client'] = None
132
+
133
+ # --- Session State Initialization ---
134
+ st.session_state.setdefault('layout_snapshots', [])
135
+ st.session_state.setdefault('layout_new_uploads', [])
136
+ st.session_state.setdefault('history', [])
137
+ st.session_state.setdefault('processing', {})
138
+ st.session_state.setdefault('asset_checkboxes', {'image': {}, 'md': {}, 'pdf': {}})
139
+ st.session_state.setdefault('downloaded_pdfs', {})
140
+ st.session_state.setdefault('unique_counter', 0)
141
+ st.session_state.setdefault('cam0_file', None)
142
+ st.session_state.setdefault('cam1_file', None)
143
+ st.session_state.setdefault('characters', [])
144
+ st.session_state.setdefault('char_form_reset_key', 0)
145
+ st.session_state.setdefault('gallery_size', 10)
146
+ st.session_state.setdefault('hf_provider', DEFAULT_PROVIDER)
147
+ st.session_state.setdefault('hf_custom_key', "")
148
+ st.session_state.setdefault('hf_selected_api_model', FEATURED_MODELS_LIST[0])
149
+ st.session_state.setdefault('hf_custom_api_model', "")
150
+ st.session_state.setdefault('openai_selected_model', OPENAI_MODELS_LIST[0] if _openai_available else "")
151
+ st.session_state.setdefault('selected_local_model_path', None)
152
+ st.session_state.setdefault('gen_max_tokens', 512)
153
+ st.session_state.setdefault('gen_temperature', 0.7)
154
+ st.session_state.setdefault('gen_top_p', 0.95)
155
+ st.session_state.setdefault('gen_frequency_penalty', 0.0)
156
+ if 'asset_gallery_container' not in st.session_state:
157
+ st.session_state['asset_gallery_container'] = {'image': st.sidebar.empty(), 'md': st.sidebar.empty(), 'pdf': st.sidebar.empty()}
158
+
159
+ # --- Dataclasses ---
160
+ @dataclass
161
+ class LocalModelConfig:
162
+ name: str
163
+ hf_id: str
164
+ model_type: str
165
+ size_category: str = "unknown"
166
+ domain: Optional[str] = None
167
+ local_path: str = field(init=False)
168
+ def __post_init__(self):
169
+ type_folder = f"{self.model_type}_models"
170
+ safe_name = re.sub(r'[^\w\-]+', '_', self.name)
171
+ self.local_path = os.path.join(type_folder, safe_name)
172
+ def get_full_path(self):
173
+ return os.path.abspath(self.local_path)
174
+
175
+ @dataclass
176
+ class DiffusionConfig:
177
+ name: str
178
+ base_model: str
179
+ size: str
180
+ domain: Optional[str] = None
181
+ @property
182
+ def model_path(self):
183
+ return f"diffusion_models/{self.name}"
184
+
185
  # --- Helper Functions ---
186
+ def generate_filename(sequence, ext="png"):
187
+ timestamp = time.strftime('%Y%m%d_%H%M%S')
188
+ safe_sequence = re.sub(r'[^\w\-]+', '_', str(sequence))
189
+ return f"{safe_sequence}_{timestamp}.{ext}"
190
+
191
+ def pdf_url_to_filename(url):
192
+ name = re.sub(r'^https?://', '', url)
193
+ name = re.sub(r'[<>:"/\\|?*]', '_', name)
194
+ return name[:100] + ".pdf"
195
+
196
+ def get_download_link(file_path, mime_type="application/octet-stream", label="Download"):
197
+ if not os.path.exists(file_path):
198
+ return f"{label} (File not found)"
199
+ try:
200
+ with open(file_path, "rb") as f:
201
+ file_bytes = f.read()
202
+ b64 = base64.b64encode(file_bytes).decode()
203
+ return f'<a href="data:{mime_type};base64,{b64}" download="{os.path.basename(file_path)}">{label}</a>'
204
+ except Exception as e:
205
+ logger.error(f"Error creating download link for {file_path}: {e}")
206
+ return f"{label} (Error)"
207
+
208
+ def zip_directory(directory_path, zip_path):
209
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
210
+ for root, _, files in os.walk(directory_path):
211
+ for file in files:
212
+ file_path = os.path.join(root, file)
213
+ zipf.write(file_path, os.path.relpath(file_path, os.path.dirname(directory_path)))
214
+
215
+ def get_local_model_paths(model_type="causal"):
216
+ pattern = f"{model_type}_models/*"
217
+ dirs = [d for d in glob.glob(pattern) if os.path.isdir(d)]
218
+ return dirs
219
+
220
+ def get_gallery_files(file_types=("png", "pdf", "jpg", "jpeg", "md", "txt")):
221
+ all_files = set()
222
+ for ext in file_types:
223
+ all_files.update(glob.glob(f"*.{ext.lower()}"))
224
+ all_files.update(glob.glob(f"*.{ext.upper()}"))
225
+ return sorted([f for f in all_files if os.path.basename(f).lower() != 'readme.md'])
226
+
227
+ def get_typed_gallery_files(file_type):
228
+ if file_type == 'image':
229
+ return get_gallery_files(('png', 'jpg', 'jpeg'))
230
+ elif file_type == 'md':
231
+ return get_gallery_files(('md',))
232
+ elif file_type == 'pdf':
233
+ return get_gallery_files(('pdf',))
234
+ return []
235
+
236
+ def download_pdf(url, output_path):
237
+ try:
238
+ headers = {'User-Agent': 'Mozilla/5.0'}
239
+ response = requests.get(url, stream=True, timeout=20, headers=headers)
240
+ response.raise_for_status()
241
+ with open(output_path, "wb") as f:
242
+ for chunk in response.iter_content(chunk_size=8192):
243
+ f.write(chunk)
244
+ logger.info(f"Successfully downloaded {url} to {output_path}")
245
+ return True
246
+ except requests.exceptions.RequestException as e:
247
+ logger.error(f"Failed to download {url}: {e}")
248
+ if os.path.exists(output_path):
249
+ try:
250
+ os.remove(output_path)
251
+ except:
252
+ pass
253
+ return False
254
+ except Exception as e:
255
+ logger.error(f"An unexpected error occurred during download of {url}: {e}")
256
+ if os.path.exists(output_path):
257
+ try:
258
+ os.remove(output_path)
259
+ except:
260
+ pass
261
+ return False
262
+
263
+ async def process_pdf_snapshot(pdf_path, mode="single", resolution_factor=2.0):
264
+ start_time = time.time()
265
+ status_placeholder = st.empty()
266
+ status_placeholder.text(f"Processing PDF Snapshot ({mode}, Res: {resolution_factor}x)... (0s)")
267
+ output_files = []
268
+ try:
269
+ doc = fitz.open(pdf_path)
270
+ matrix = fitz.Matrix(resolution_factor, resolution_factor)
271
+ num_pages_to_process = min(1, len(doc)) if mode == "single" else min(2, len(doc)) if mode == "twopage" else len(doc)
272
+ for i in range(num_pages_to_process):
273
+ page_start_time = time.time()
274
+ page = doc[i]
275
+ pix = page.get_pixmap(matrix=matrix)
276
+ base_name = os.path.splitext(os.path.basename(pdf_path))[0]
277
+ output_file = generate_filename(f"{base_name}_pg{i+1}_{mode}", "png")
278
+ await asyncio.to_thread(pix.save, output_file)
279
+ output_files.append(output_file)
280
+ elapsed_page = int(time.time() - page_start_time)
281
+ status_placeholder.text(f"Processing PDF Snapshot ({mode}, Res: {resolution_factor}x)... Page {i+1}/{num_pages_to_process} done ({elapsed_page}s)")
282
+ await asyncio.sleep(0.01)
283
+ doc.close()
284
+ elapsed = int(time.time() - start_time)
285
+ status_placeholder.success(f"PDF Snapshot ({mode}, {len(output_files)} files) completed in {elapsed}s!")
286
+ return output_files
287
+ except Exception as e:
288
+ logger.error(f"Failed to process PDF snapshot for {pdf_path}: {e}")
289
+ status_placeholder.error(f"Failed to process PDF {os.path.basename(pdf_path)}: {e}")
290
+ for f in output_files:
291
+ if os.path.exists(f):
292
+ os.remove(f)
293
+ return []
294
+
295
+ def get_hf_client() -> Optional[InferenceClient]:
296
+ provider = st.session_state.hf_provider
297
+ custom_key = st.session_state.hf_custom_key.strip()
298
+ token_to_use = custom_key if custom_key else HF_TOKEN
299
+ if not token_to_use and provider != "hf-inference":
300
+ st.error(f"Provider '{provider}' requires a Hugging Face API token.")
301
+ return None
302
+ if provider == "hf-inference" and not token_to_use:
303
+ logger.warning("Using hf-inference provider without a token. Rate limits may apply.")
304
+ token_to_use = None
305
+ current_client = st.session_state.get('hf_inference_client')
306
+ needs_reinit = True
307
+ if current_client:
308
+ client_uses_custom = hasattr(current_client, '_token') and current_client._token == custom_key
309
+ client_uses_default = hasattr(current_client, '_token') and current_client._token == HF_TOKEN
310
+ client_uses_no_token = not hasattr(current_client, '_token') or current_client._token is None
311
+ if current_client.provider == provider:
312
+ if custom_key and client_uses_custom:
313
+ needs_reinit = False
314
+ elif not custom_key and HF_TOKEN and client_uses_default:
315
+ needs_reinit = False
316
+ elif not custom_key and not HF_TOKEN and client_uses_no_token:
317
+ needs_reinit = False
318
+ if needs_reinit:
319
+ try:
320
+ logger.info(f"Initializing InferenceClient for provider: {provider}.")
321
+ st.session_state.hf_inference_client = InferenceClient(token=token_to_use, provider=provider)
322
+ logger.info("InferenceClient initialized successfully.")
323
+ except Exception as e:
324
+ st.error(f"Failed to initialize Hugging Face client: {e}")
325
+ logger.error(f"InferenceClient initialization failed: {e}")
326
+ st.session_state.hf_inference_client = None
327
+ return st.session_state.hf_inference_client
328
+
329
+ def process_text_hf(text: str, prompt: str, use_api: bool, model_id: str = None) -> str:
330
+ status_placeholder = st.empty()
331
+ start_time = time.time()
332
+ result_text = ""
333
+ params = {
334
+ "max_new_tokens": st.session_state.gen_max_tokens,
335
+ "temperature": st.session_state.gen_temperature,
336
+ "top_p": st.session_state.gen_top_p,
337
+ "repetition_penalty": st.session_state.gen_frequency_penalty + 1.0,
338
+ }
339
+ seed = st.session_state.gen_seed
340
+ if seed != -1:
341
+ params["seed"] = seed
342
+ system_prompt = "You are a helpful assistant. Process the following text based on the user's request."
343
+ full_prompt = f"{prompt}\n\n---\n\n{text}"
344
+ messages = [
345
+ {"role": "system", "content": system_prompt},
346
+ {"role": "user", "content": full_prompt}
347
+ ]
348
+ if use_api:
349
+ status_placeholder.info("Processing text using Hugging Face API...")
350
+ client = get_hf_client()
351
+ if not client:
352
+ return "Error: Hugging Face client not available."
353
+ model_id = model_id or st.session_state.hf_custom_api_model.strip() or st.session_state.hf_selected_api_model
354
+ status_placeholder.info(f"Using API Model: {model_id}")
355
+ try:
356
+ response = client.chat_completion(
357
+ model=model_id,
358
+ messages=messages,
359
+ max_tokens=params['max_new_tokens'],
360
+ temperature=params['temperature'],
361
+ top_p=params['top_p'],
362
+ )
363
+ result_text = response.choices[0].message.content or ""
364
+ logger.info(f"HF API text processing successful for model {model_id}.")
365
+ except Exception as e:
366
+ logger.error(f"HF API text processing failed for model {model_id}: {e}")
367
+ result_text = f"Error during Hugging Face API inference: {str(e)}"
368
+ else:
369
+ status_placeholder.info("Processing text using local model...")
370
+ if not _transformers_available:
371
+ return "Error: Transformers library not available."
372
+ model_path = st.session_state.get('selected_local_model_path')
373
+ if not model_path or model_path not in st.session_state.get('local_models', {}):
374
+ return "Error: No suitable local model selected."
375
+ local_model_data = st.session_state['local_models'][model_path]
376
+ if local_model_data.get('type') != 'causal':
377
+ return f"Error: Loaded model '{os.path.basename(model_path)}' is not a Causal LM."
378
+ status_placeholder.info(f"Using Local Model: {os.path.basename(model_path)}")
379
+ model = local_model_data.get('model')
380
+ tokenizer = local_model_data.get('tokenizer')
381
+ if not model or not tokenizer:
382
+ return f"Error: Model or tokenizer not found for {os.path.basename(model_path)}."
383
+ try:
384
+ try:
385
+ prompt_for_model = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
386
+ except Exception:
387
+ logger.warning(f"Could not apply chat template for {model_path}. Using basic formatting.")
388
+ prompt_for_model = f"System: {system_prompt}\nUser: {full_prompt}\nAssistant:"
389
+ inputs = tokenizer(prompt_for_model, return_tensors="pt", padding=True, truncation=True, max_length=params['max_new_tokens'] * 2)
390
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
391
+ generate_params = {
392
+ "max_new_tokens": params['max_new_tokens'],
393
+ "temperature": params['temperature'],
394
+ "top_p": params['top_p'],
395
+ "repetition_penalty": params.get('repetition_penalty', 1.0),
396
+ "do_sample": True if params['temperature'] > 0.1 else False,
397
+ "pad_token_id": tokenizer.eos_token_id
398
+ }
399
+ with torch.no_grad():
400
+ outputs = model.generate(**inputs, **generate_params)
401
+ input_length = inputs['input_ids'].shape[1]
402
+ generated_ids = outputs[0][input_length:]
403
+ result_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
404
+ logger.info(f"Local text processing successful for model {model_path}.")
405
+ except Exception as e:
406
+ logger.error(f"Local text processing failed for model {model_path}: {e}")
407
+ result_text = f"Error during local model inference: {str(e)}"
408
+ elapsed = int(time.time() - start_time)
409
+ status_placeholder.success(f"Text processing completed in {elapsed}s.")
410
+ return result_text
411
+
412
+ def process_text_openai(text: str, prompt: str, model_id: str) -> str:
413
+ if not _openai_available or not st.session_state.get('openai_client'):
414
+ return "Error: OpenAI client not available or API key missing."
415
+ status_placeholder = st.empty()
416
+ start_time = time.time()
417
+ client = st.session_state['openai_client']
418
+ system_prompt = "You are a helpful assistant. Process the following text based on the user's request."
419
+ full_prompt = f"{prompt}\n\n---\n\n{text}"
420
+ messages = [
421
+ {"role": "system", "content": system_prompt},
422
+ {"role": "user", "content": full_prompt}
423
+ ]
424
+ status_placeholder.info(f"Processing text using OpenAI model: {model_id}...")
425
+ try:
426
+ response = client.chat.completions.create(
427
+ model=model_id,
428
+ messages=messages,
429
+ max_tokens=st.session_state.gen_max_tokens,
430
+ temperature=st.session_state.gen_temperature,
431
+ top_p=st.session_state.gen_top_p,
432
+ )
433
+ result_text = response.choices[0].message.content or ""
434
+ logger.info(f"OpenAI text processing successful for model {model_id}.")
435
+ except Exception as e:
436
+ logger.error(f"OpenAI text processing failed for model {model_id}: {e}")
437
+ result_text = f"Error during OpenAI inference: {str(e)}"
438
+ elapsed = int(time.time() - start_time)
439
+ status_placeholder.success(f"Text processing completed in {elapsed}s.")
440
+ return result_text
441
+
442
+ def process_image_hf(image: Image.Image, prompt: str, use_api: bool, model_id: str = None) -> str:
443
+ status_placeholder = st.empty()
444
+ start_time = time.time()
445
+ result_text = ""
446
+ if use_api:
447
+ status_placeholder.info("Processing image using Hugging Face API...")
448
+ client = get_hf_client()
449
+ if not client:
450
+ return "Error: HF client not configured."
451
+ buffered = BytesIO()
452
+ image.save(buffered, format="PNG" if image.format != 'JPEG' else 'JPEG')
453
+ img_bytes = buffered.getvalue()
454
+ model_id = model_id or "Salesforce/blip-image-captioning-large"
455
+ status_placeholder.info(f"Using API Image-to-Text Model: {model_id}")
456
+ try:
457
+ response_list = client.image_to_text(data=img_bytes, model=model_id)
458
+ if response_list and isinstance(response_list, list) and 'generated_text' in response_list[0]:
459
+ result_text = response_list[0]['generated_text']
460
+ logger.info(f"HF API image captioning successful for model {model_id}.")
461
+ else:
462
+ result_text = "Error: Unexpected response format from image-to-text API."
463
+ logger.warning(f"Unexpected API response for image-to-text: {response_list}")
464
+ except Exception as e:
465
+ logger.error(f"HF API image processing failed: {e}")
466
+ result_text = f"Error during Hugging Face API image inference: {str(e)}"
467
+ else:
468
+ status_placeholder.info("Processing image using local model...")
469
+ if not _transformers_available:
470
+ return "Error: Transformers library needed."
471
+ model_path = st.session_state.get('selected_local_model_path')
472
+ if not model_path or model_path not in st.session_state.get('local_models', {}):
473
+ return "Error: No suitable local model selected."
474
+ local_model_data = st.session_state['local_models'][model_path]
475
+ model_type = local_model_data.get('type')
476
+ if model_type == 'vision':
477
+ processor = local_model_data.get('processor')
478
+ model = local_model_data.get('model')
479
+ if processor and model:
480
+ try:
481
+ inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device)
482
+ generated_ids = model.generate(**inputs, max_new_tokens=st.session_state.gen_max_tokens)
483
+ result_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
484
+ except Exception as e:
485
+ result_text = f"Error during local vision model inference: {e}"
486
+ else:
487
+ result_text = "Error: Processor or model missing for local vision task."
488
+ elif model_type == 'ocr':
489
+ processor = local_model_data.get('processor')
490
+ model = local_model_data.get('model')
491
+ if processor and model:
492
+ try:
493
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(model.device)
494
+ generated_ids = model.generate(pixel_values, max_new_tokens=st.session_state.gen_max_tokens)
495
+ result_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
496
+ except Exception as e:
497
+ result_text = f"Error during local OCR model inference: {e}"
498
+ else:
499
+ result_text = "Error: Processor or model missing for local OCR task."
500
+ else:
501
+ result_text = f"Error: Loaded model '{os.path.basename(model_path)}' is not a recognized vision/OCR type."
502
+ elapsed = int(time.time() - start_time)
503
+ status_placeholder.success(f"Image processing completed in {elapsed}s.")
504
+ return result_text
505
+
506
+ def process_image_openai(image: Image.Image, prompt: str, model_id: str = "gpt-4o") -> str:
507
+ if not _openai_available or not st.session_state.get('openai_client'):
508
+ return "Error: OpenAI client not available or API key missing."
509
+ status_placeholder = st.empty()
510
+ start_time = time.time()
511
+ client = st.session_state['openai_client']
512
+ buffered = BytesIO()
513
+ image.save(buffered, format="PNG")
514
+ img_b64 = base64.b64encode(buffered.getvalue()).decode()
515
+ status_placeholder.info(f"Processing image using OpenAI model: {model_id}...")
516
+ try:
517
+ response = client.chat.completions.create(
518
+ model=model_id,
519
+ messages=[
520
+ {"role": "user", "content": [
521
+ {"type": "text", "text": prompt},
522
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_b64}"}}
523
+ ]}
524
+ ],
525
+ max_tokens=st.session_state.gen_max_tokens,
526
+ temperature=st.session_state.gen_temperature,
527
+ )
528
+ result_text = response.choices[0].message.content or ""
529
+ logger.info(f"OpenAI image processing successful for model {model_id}.")
530
+ except Exception as e:
531
+ logger.error(f"OpenAI image processing failed for model {model_id}: {e}")
532
+ result_text = f"Error during OpenAI image inference: {str(e)}"
533
+ elapsed = int(time.time() - start_time)
534
+ status_placeholder.success(f"Image processing completed in {elapsed}s.")
535
+ return result_text
536
+
537
+ async def process_hf_ocr(image: Image.Image, output_file: str, use_api: bool, model_id: str = None) -> str:
538
+ ocr_prompt = "Extract text content from this image."
539
+ result = process_image_hf(image, ocr_prompt, use_api, model_id=model_id or "microsoft/trocr-large-handwritten")
540
+ if result and not result.startswith("Error") and not result.startswith("["):
541
+ try:
542
+ async with aiofiles.open(output_file, "w", encoding='utf-8') as f:
543
+ await f.write(result)
544
+ logger.info(f"HF OCR result saved to {output_file}")
545
+ except IOError as e:
546
+ logger.error(f"Failed to save HF OCR output to {output_file}: {e}")
547
+ result += f"\n[Error saving file: {e}]"
548
+ elif os.path.exists(output_file):
549
+ try:
550
+ os.remove(output_file)
551
+ except OSError:
552
+ pass
553
+ return result
554
+
555
+ async def process_openai_ocr(image: Image.Image, output_file: str, model_id: str = "gpt-4o") -> str:
556
+ ocr_prompt = "Extract text content from this image."
557
+ result = process_image_openai(image, ocr_prompt, model_id)
558
+ if result and not result.startswith("Error"):
559
+ try:
560
+ async with aiofiles.open(output_file, "w", encoding='utf-8') as f:
561
+ await f.write(result)
562
+ logger.info(f"OpenAI OCR result saved to {output_file}")
563
+ except IOError as e:
564
+ logger.error(f"Failed to save OpenAI OCR output to {output_file}: {e}")
565
+ result += f"\n[Error saving file: {e}]"
566
+ elif os.path.exists(output_file):
567
+ try:
568
+ os.remove(output_file)
569
+ except OSError:
570
+ pass
571
+ return result
572
+
573
+ def randomize_character_content():
574
+ intro_templates = [
575
+ "{char} is a valiant knight...", "{char} is a mischievous thief...",
576
+ "{char} is a wise scholar...", "{char} is a fiery warrior...", "{char} is a gentle healer..."
577
+ ]
578
+ greeting_templates = [
579
+ "'I am from the knight's guild...'", "'I heard you needed helpβ€”name’s {char}...",
580
+ "'Oh, hello! IοΏ½οΏ½m {char}, didn’t see you there...'", "'I’m {char}, and I’m here to fight...'",
581
+ "'I’m {char}, here to heal...'"
582
+ ]
583
+ name = f"Character_{random.randint(1000, 9999)}"
584
+ gender = random.choice(["Male", "Female"])
585
+ intro = random.choice(intro_templates).format(char=name)
586
+ greeting = random.choice(greeting_templates).format(char=name)
587
+ return name, gender, intro, greeting
588
+
589
+ def save_character(character_data):
590
+ characters = st.session_state.get('characters', [])
591
+ if any(c['name'] == character_data['name'] for c in characters):
592
+ st.error(f"Character name '{character_data['name']}' already exists.")
593
+ return False
594
+ characters.append(character_data)
595
+ st.session_state['characters'] = characters
596
+ try:
597
+ with open("characters.json", "w", encoding='utf-8') as f:
598
+ json.dump(characters, f, indent=2)
599
+ logger.info(f"Saved character: {character_data['name']}")
600
+ return True
601
+ except IOError as e:
602
+ logger.error(f"Failed to save characters.json: {e}")
603
+ st.error(f"Failed to save character file: {e}")
604
+ return False
605
+
606
+ def load_characters():
607
+ if not os.path.exists("characters.json"):
608
+ st.session_state['characters'] = []
609
+ return
610
+ try:
611
+ with open("characters.json", "r", encoding='utf-8') as f:
612
+ characters = json.load(f)
613
+ if isinstance(characters, list):
614
+ st.session_state['characters'] = characters
615
+ logger.info(f"Loaded {len(characters)} characters.")
616
+ else:
617
+ st.session_state['characters'] = []
618
+ logger.warning("characters.json is not a list, resetting.")
619
+ os.remove("characters.json")
620
+ except (json.JSONDecodeError, IOError) as e:
621
+ logger.error(f"Failed to load or decode characters.json: {e}")
622
+ st.error(f"Error loading character file: {e}. Starting fresh.")
623
+ st.session_state['characters'] = []
624
+ try:
625
+ corrupt_filename = f"characters_corrupt_{int(time.time())}.json"
626
+ shutil.copy("characters.json", corrupt_filename)
627
+ logger.info(f"Backed up corrupted character file to {corrupt_filename}")
628
+ os.remove("characters.json")
629
+ except Exception as backup_e:
630
+ logger.error(f"Could not backup corrupted character file: {backup_e}")
631
+
632
+ def clean_stem(fn: str) -> str:
633
+ name = os.path.splitext(os.path.basename(fn))[0]
634
+ name = name.replace('-', ' ').replace('_', ' ')
635
+ return name.strip().title()
636
+
637
+ def make_image_sized_pdf(sources, is_markdown_flags):
638
+ if not sources:
639
+ st.warning("No sources provided for PDF generation.")
640
+ return None
641
+ buf = BytesIO()
642
+ styles = getSampleStyleSheet()
643
+ md_style = ParagraphStyle(
644
+ name='Markdown',
645
+ fontSize=10,
646
+ leading=12,
647
+ spaceAfter=6,
648
+ alignment=TA_JUSTIFY,
649
+ fontName='Helvetica'
650
+ )
651
+ doc = SimpleDocTemplate(buf, pagesize=letter, rightMargin=36, leftMargin=36, topMargin=36, bottomMargin=36)
652
+ story = []
653
+ try:
654
+ for idx, (src, is_md) in enumerate(zip(sources, is_markdown_flags), start=1):
655
+ status_placeholder = st.empty()
656
+ filename = 'page_' + str(idx)
657
+ status_placeholder.info(f"Adding page {idx}/{len(sources)}: {os.path.basename(str(src))}...")
658
+ try:
659
+ if is_md:
660
+ with open(src, 'r', encoding='utf-8') as f:
661
+ content = f.read()
662
+ content = re.sub(r'!\[.*?\]\(.*?\)', '', content)
663
+ paragraphs = content.split('\n\n')
664
+ for para in paragraphs:
665
+ if para.strip():
666
+ story.append(Paragraph(para.strip(), md_style))
667
+ story.append(PageBreak())
668
+ status_placeholder.success(f"Added markdown page {idx}/{len(sources)}: {filename}")
669
+ else:
670
+ if isinstance(src, str):
671
+ if not os.path.exists(src):
672
+ logger.warning(f"Image file not found: {src}. Skipping.")
673
+ status_placeholder.warning(f"Skipping missing file: {os.path.basename(src)}")
674
+ continue
675
+ img_obj = Image.open(src)
676
+ filename = os.path.basename(src)
677
+ else:
678
+ src.seek(0)
679
+ img_obj = Image.open(src)
680
+ filename = getattr(src, 'name', f'uploaded_image_{idx}')
681
+ src.seek(0)
682
+ with img_obj:
683
+ iw, ih = img_obj.size
684
+ if iw <= 0 or ih <= 0:
685
+ logger.warning(f"Invalid image dimensions ({iw}x{ih}) for {filename}. Skipping.")
686
+ status_placeholder.warning(f"Skipping invalid image: {filename}")
687
+ continue
688
+ cap_h = 30
689
+ c = canvas.Canvas(BytesIO(), pagesize=(iw, ih + cap_h))
690
+ img_reader = ImageReader(img_obj)
691
+ c.drawImage(img_reader, 0, cap_h, width=iw, height=ih, preserveAspectRatio=True, anchor='c', mask='auto')
692
+ caption = clean_stem(filename)
693
+ c.setFont('Helvetica', 12)
694
+ c.setFillColorRGB(0, 0, 0)
695
+ c.drawCentredString(iw / 2, cap_h / 2 + 3, caption)
696
+ c.setFont('Helvetica', 8)
697
+ c.setFillColorRGB(0.5, 0.5, 0.5)
698
+ c.drawRightString(iw - 10, 8, f"Page {idx}")
699
+ c.save()
700
+ story.append(PageBreak())
701
+ status_placeholder.success(f"Added image page {idx}/{len(sources)}: {filename}")
702
+ except Exception as e:
703
+ logger.error(f"Error processing source {src}: {e}")
704
+ status_placeholder.error(f"Error adding page {idx}: {e}")
705
+ doc.build(story)
706
+ buf.seek(0)
707
+ if buf.getbuffer().nbytes < 100:
708
+ st.error("PDF generation resulted in an empty file.")
709
+ return None
710
+ return buf.getvalue()
711
+ except Exception as e:
712
+ logger.error(f"Fatal error during PDF generation: {e}")
713
+ st.error(f"PDF Generation Failed: {e}")
714
+ return None
715
+
716
+ def update_gallery(gallery_type='image'):
717
+ container = st.session_state['asset_gallery_container'][gallery_type]
718
+ with container:
719
+ st.markdown(f"### {gallery_type.capitalize()} Gallery πŸ“Έ")
720
+ files = get_typed_gallery_files(gallery_type)
721
+ if not files:
722
+ st.info(f"No {gallery_type} assets found yet.")
723
+ return
724
+ st.caption(f"Found {len(files)} assets:")
725
+ for idx, file in enumerate(files[:st.session_state.gallery_size]):
726
+ st.session_state['unique_counter'] += 1
727
+ unique_id = st.session_state['unique_counter']
728
+ item_key_base = f"{gallery_type}_gallery_item_{os.path.basename(file)}_{unique_id}"
729
+ basename = os.path.basename(file)
730
+ st.markdown(f"**{basename}**")
731
+ try:
732
+ file_ext = os.path.splitext(file)[1].lower()
733
+ if gallery_type == 'image' and file_ext in ['.png', '.jpg', '.jpeg']:
734
+ with st.expander("Preview", expanded=False):
735
+ st.image(Image.open(file), use_container_width=True)
736
+ elif gallery_type == 'pdf' and file_ext == '.pdf':
737
+ with st.expander("Preview (Page 1)", expanded=False):
738
+ doc = fitz.open(file)
739
+ if len(doc) > 0:
740
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
741
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
742
+ st.image(img, use_container_width=True)
743
+ else:
744
+ st.warning("Empty PDF")
745
+ doc.close()
746
+ elif gallery_type == 'md' and file_ext == '.md':
747
+ with st.expander("Preview (Start)", expanded=False):
748
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
749
+ content_preview = f.read(200)
750
+ st.code(content_preview + "...", language='markdown')
751
+ action_cols = st.columns(3)
752
+ with action_cols[0]:
753
+ checkbox_key = f"cb_{item_key_base}"
754
+ st.session_state['asset_checkboxes'][gallery_type][file] = st.checkbox(
755
+ "Select",
756
+ value=st.session_state['asset_checkboxes'][gallery_type].get(file, False),
757
+ key=checkbox_key
758
+ )
759
+ with action_cols[1]:
760
+ mime_map = {'.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.pdf': 'application/pdf', '.md': 'text/markdown'}
761
+ mime_type = mime_map.get(file_ext, "application/octet-stream")
762
+ dl_key = f"dl_{item_key_base}"
763
+ try:
764
+ with open(file, "rb") as fp:
765
+ st.download_button(
766
+ label="πŸ“₯",
767
+ data=fp,
768
+ file_name=basename,
769
+ mime=mime_type,
770
+ key=dl_key,
771
+ help="Download this file"
772
+ )
773
+ except Exception as dl_e:
774
+ st.error(f"Download Error: {dl_e}")
775
+ with action_cols[2]:
776
+ delete_key = f"del_{item_key_base}"
777
+ if st.button("πŸ—‘οΈ", key=delete_key, help=f"Delete {basename}"):
778
+ try:
779
+ os.remove(file)
780
+ st.session_state['asset_checkboxes'][gallery_type].pop(file, None)
781
+ if file in st.session_state.get('layout_snapshots', []):
782
+ st.session_state['layout_snapshots'].remove(file)
783
+ logger.info(f"Deleted {gallery_type} asset: {file}")
784
+ st.toast(f"Deleted {basename}!", icon="βœ…")
785
+ st.rerun()
786
+ except OSError as e:
787
+ logger.error(f"Error deleting file {file}: {e}")
788
+ st.error(f"Could not delete {basename}")
789
+ except Exception as e:
790
+ st.error(f"Error displaying {basename}: {e}")
791
+ logger.error(f"Error displaying asset {file}: {e}")
792
+ st.markdown("---")
793
+
794
+ # --- UI Elements ---
795
+ st.sidebar.subheader("πŸ€– AI Settings")
796
+ with st.sidebar.expander("API Inference Settings", expanded=False):
797
+ st.session_state.hf_custom_key = st.text_input(
798
+ "Custom HF Token",
799
+ value=st.session_state.get('hf_custom_key', ""),
800
+ type="password",
801
+ key="hf_custom_key_input"
802
+ )
803
+ token_status = "Custom Key Set" if st.session_state.hf_custom_key else ("Default HF_TOKEN Set" if HF_TOKEN else "No Token Set")
804
+ st.caption(f"HF Token Status: {token_status}")
805
+ providers_list = ["hf-inference", "cerebras", "together", "sambanova", "novita", "cohere", "fireworks-ai", "hyperbolic", "nebius"]
806
+ st.session_state.hf_provider = st.selectbox(
807
+ "HF Inference Provider",
808
+ options=providers_list,
809
+ index=providers_list.index(st.session_state.get('hf_provider', DEFAULT_PROVIDER)),
810
+ key="hf_provider_select"
811
+ )
812
+ st.session_state.hf_custom_api_model = st.text_input(
813
+ "Custom HF API Model ID",
814
+ value=st.session_state.get('hf_custom_api_model', ""),
815
+ key="hf_custom_model_input"
816
+ )
817
+ effective_hf_model = st.session_state.hf_custom_api_model.strip() or st.session_state.hf_selected_api_model
818
+ st.session_state.hf_selected_api_model = st.selectbox(
819
+ "Featured HF API Model",
820
+ options=FEATURED_MODELS_LIST,
821
+ index=FEATURED_MODELS_LIST.index(st.session_state.get('hf_selected_api_model', FEATURED_MODELS_LIST[0])),
822
+ key="hf_featured_model_select"
823
+ )
824
+ st.caption(f"Effective HF API Model: {effective_hf_model}")
825
+ if _openai_available:
826
+ st.session_state.openai_selected_model = st.selectbox(
827
+ "OpenAI Model",
828
+ options=OPENAI_MODELS_LIST,
829
+ index=OPENAI_MODELS_LIST.index(st.session_state.get('openai_selected_model', OPENAI_MODELS_LIST[0])),
830
+ key="openai_model_select"
831
+ )
832
+
833
+ with st.sidebar.expander("Local Model Selection", expanded=True):
834
+ if not _transformers_available:
835
+ st.warning("Transformers library not found. Cannot load local models.")
836
+ else:
837
+ local_model_options = ["None"] + list(st.session_state.get('local_models', {}).keys())
838
+ current_selection = st.session_state.get('selected_local_model_path', "None")
839
+ if current_selection not in local_model_options:
840
+ current_selection = "None"
841
+ selected_path = st.selectbox(
842
+ "Active Local Model",
843
+ options=local_model_options,
844
+ index=local_model_options.index(current_selection),
845
+ format_func=lambda x: os.path.basename(x) if x != "None" else "None",
846
+ key="local_model_selector"
847
+ )
848
+ st.session_state.selected_local_model_path = selected_path if selected_path != "None" else None
849
+ if st.session_state.selected_local_model_path:
850
+ model_info = st.session_state.local_models[st.session_state.selected_local_model_path]
851
+ st.caption(f"Type: {model_info.get('type', 'Unknown')}")
852
+ st.caption(f"Device: {model_info.get('model').device if model_info.get('model') else 'N/A'}")
853
+ else:
854
+ st.caption("No local model selected.")
855
+
856
+ with st.sidebar.expander("Generation Parameters", expanded=False):
857
+ st.session_state.gen_max_tokens = st.slider("Max New Tokens", 1, 4096, st.session_state.get('gen_max_tokens', 512), key="param_max_tokens")
858
+ st.session_state.gen_temperature = st.slider("Temperature", 0.01, 2.0, st.session_state.get('gen_temperature', 0.7), step=0.01, key="param_temp")
859
+ st.session_state.gen_top_p = st.slider("Top-P", 0.01, 1.0, st.session_state.get('gen_top_p', 0.95), step=0.01, key="param_top_p")
860
+ st.session_state.gen_frequency_penalty = st.slider("Repetition Penalty", 0.0, 1.0, st.session_state.get('gen_frequency_penalty', 0.0), step=0.05, key="param_repetition")
861
+ st.session_state.gen_seed = st.slider("Seed", -1, 65535, st.session_state.get('gen_seed', -1), step=1, key="param_seed")
862
+
863
+ st.sidebar.subheader("πŸ–ΌοΈ Gallery Settings")
864
+ st.slider(
865
+ "Max Items Shown",
866
+ min_value=2,
867
+ max_value=50,
868
+ value=st.session_state.get('gallery_size', 10),
869
+ key="gallery_size_slider"
870
+ )
871
+ st.session_state.gallery_size = st.session_state.gallery_size_slider
872
+ st.sidebar.markdown("---")
873
+ update_gallery('image')
874
+ update_gallery('md')
875
+ update_gallery('pdf')
876
+
877
+ # --- Main Application ---
878
+ st.title("Vision & Layout Titans πŸš€πŸ–ΌοΈπŸ“„")
879
+ st.markdown("Create PDFs from images and markdown, process with AI, and manage characters.")
880
+ tabs = st.tabs([
881
+ "Image/MD->PDF Layout πŸ–ΌοΈβž‘οΈπŸ“„",
882
+ "Camera Snap πŸ“·",
883
+ "Download PDFs πŸ“₯",
884
+ "Build Titan (Local Models) 🌱",
885
+ "PDF Process (AI) πŸ“„",
886
+ "Image Process (AI) πŸ–ΌοΈ",
887
+ "Text Process (AI) πŸ“",
888
+ "Test OCR (AI) πŸ”",
889
+ "Test Image Gen (Diffusers) 🎨",
890
+ "Character Editor πŸ§‘β€πŸŽ¨",
891
+ "Character Gallery πŸ–ΌοΈ"
892
+ ])
893
+
894
+ with tabs[0]:
895
+ st.header("Image/Markdown to PDF Layout Generator")
896
+ st.markdown("Select images and markdown files, reorder them, and generate a PDF.")
897
+ col1, col2 = st.columns(2)
898
+ with col1:
899
+ st.subheader("A. Select Assets")
900
+ selected_images = [f for f in get_typed_gallery_files('image') if st.session_state['asset_checkboxes']['image'].get(f, False)]
901
+ selected_mds = [f for f in get_typed_gallery_files('md') if st.session_state['asset_checkboxes']['md'].get(f, False)]
902
+ st.write(f"Selected Images: {len(selected_images)}")
903
+ st.write(f"Selected Markdown Files: {len(selected_mds)}")
904
+ with col2:
905
+ st.subheader("B. Review and Reorder")
906
+ layout_records = []
907
+ for idx, path in enumerate(selected_images + selected_mds, start=1):
908
+ is_md = path in selected_mds
909
+ try:
910
+ if is_md:
911
+ with open(path, 'r', encoding='utf-8') as f:
912
+ content = f.read(50)
913
+ layout_records.append({
914
+ "filename": os.path.basename(path),
915
+ "source": path,
916
+ "type": "Markdown",
917
+ "preview": content + "...",
918
+ "order": idx
919
+ })
920
+ else:
921
+ with Image.open(path) as im:
922
+ w, h = im.size
923
+ ar = round(w / h, 2) if h > 0 else 0
924
+ orient = "Square" if 0.9 <= ar <= 1.1 else ("Landscape" if ar > 1.1 else "Portrait")
925
+ layout_records.append({
926
+ "filename": os.path.basename(path),
927
+ "source": path,
928
+ "type": "Image",
929
+ "width": w,
930
+ "height": h,
931
+ "aspect_ratio": ar,
932
+ "orientation": orient,
933
+ "order": idx
934
+ })
935
+ except Exception as e:
936
+ logger.warning(f"Could not process {path}: {e}")
937
+ st.warning(f"Skipping invalid file: {os.path.basename(path)}")
938
+ if not layout_records:
939
+ st.infoperiod