Spaces:

prithivMLmods
/

core-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 6

Commit

ecce109

verified ·

1 Parent(s): 4f97d6f

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -38

app.py CHANGED Viewed

@@ -42,23 +42,6 @@ h1 {
 }
 '''
-def progress_bar_html(label: str) -> str:
-    """Return an HTML snippet with a label and an animated, thin light-blue progress bar."""
-    return f"""
-<div style="display: flex; align-items: center;">
-  <span style="margin-right: 8px;">{label}</span>
-  <div style="position: relative; width: 110px; height: 5px; background: #e0e0e0; border-radius: 5px; overflow: hidden;">
-    <div style="width: 100%; height: 100%; background-color: lightblue; animation: progress-bar-animation 1s linear infinite;"></div>
-  </div>
-</div>
-<style>
-@keyframes progress-bar-animation {{
-  0% {{ transform: translateX(-100%); }}
-  100% {{ transform: translateX(100%); }}
-}}
-</style>
-"""
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -105,6 +88,23 @@ def clean_chat_history(chat_history):
             cleaned.append(msg)
     return cleaned
 # Environment variables and parameters for Stable Diffusion XL
 MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # SDXL Model repository path via env variable
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
@@ -214,15 +214,13 @@ def generate(
     text = input_dict["text"]
     files = input_dict.get("files", [])
-    tts_prefix = "@tts"
-    is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
-    voice_index = next((i for i in range(1, 3) if text.strip().lower().startswith(f"{tts_prefix}{i}")), None)
     if text.strip().lower().startswith("@image"):
         # Remove the "@image" tag and use the rest as prompt
         prompt = text[len("@image"):].strip()
-        # Yield progress bar for image generation
-        yield progress_bar_html("Generating Image")
         image_paths, used_seed = generate_image_fn(
             prompt=prompt,
             negative_prompt="",
@@ -236,10 +234,15 @@ def generate(
             use_resolution_binning=True,
             num_images=1,
         )
-        # Yield the generated image, replacing the progress bar
         yield gr.Image(image_paths[0])
         return  # Exit early
     if is_tts and voice_index:
         voice = TTS_VOICES[voice_index - 1]
         text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
@@ -252,6 +255,7 @@ def generate(
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
@@ -274,17 +278,18 @@ def generate(
         thread.start()
         buffer = ""
-        # Yield initial progress bar for multimodal generation
-        yield progress_bar_html("Thinking...")
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
             time.sleep(0.01)
-            # Update with partial text and progress bar
-            yield f"<div>{buffer}</div><div>{progress_bar_html('Thinking...')}</div>"
-        # Final output: remove progress bar
-        yield f"<div>{buffer}</div>"
     else:
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
@@ -305,18 +310,16 @@ def generate(
         t = Thread(target=model.generate, kwargs=generation_kwargs)
         t.start()
-        # Yield initial progress bar for text generation
-        yield progress_bar_html("Thinking...")
         outputs = []
         for new_text in streamer:
             outputs.append(new_text)
-            current_text = "".join(outputs)
-            time.sleep(0.01)
-            # Update message with partial text and progress bar
-            yield f"<div>{current_text}</div><div>{progress_bar_html('Thinking...')}</div>"
         final_response = "".join(outputs)
-        # Final output: only the final response text, progress bar removed.
-        yield f"<div>{final_response}</div>"
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice:

 }
 '''
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
             cleaned.append(msg)
     return cleaned
+# Helper: returns HTML code for a thin light-green animated progress bar with a label.
+def progress_bar_html(label: str) -> str:
+    return f'''
+<div style="display: flex; align-items: center;">
+  <span>{label}</span>
+  <div style="flex-grow: 1; margin-left: 8px; height: 5px; background-color: lightgreen; overflow: hidden; position: relative;">
+    <div style="width: 100%; height: 100%; background: linear-gradient(90deg, rgba(255,255,255,0) 0%, rgba(255,255,255,0.5) 50%, rgba(255,255,255,0) 100%); animation: progressAnim 1s linear infinite;"></div>
+  </div>
+</div>
+<style>
+@keyframes progressAnim {{
+  0% {{ transform: translateX(-100%); }}
+  100% {{ transform: translateX(100%); }}
+}}
+</style>
+'''
 # Environment variables and parameters for Stable Diffusion XL
 MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # SDXL Model repository path via env variable
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
     text = input_dict["text"]
     files = input_dict.get("files", [])
+    # For image generation triggered by "@image"
     if text.strip().lower().startswith("@image"):
         # Remove the "@image" tag and use the rest as prompt
         prompt = text[len("@image"):].strip()
+        # Yield a progress bar with label "Generating Image"
+        progress_component = gr.HTML(progress_bar_html("Generating Image"))
+        yield progress_component
         image_paths, used_seed = generate_image_fn(
             prompt=prompt,
             negative_prompt="",
             use_resolution_binning=True,
             num_images=1,
         )
+        # Clear the progress bar (replace with empty HTML) and then yield the image
+        yield gr.HTML.update(value="")
         yield gr.Image(image_paths[0])
         return  # Exit early
+    tts_prefix = "@tts"
+    is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
+    voice_index = next((i for i in range(1, 3) if text.strip().lower().startswith(f"{tts_prefix}{i}")), None)
     if is_tts and voice_index:
         voice = TTS_VOICES[voice_index - 1]
         text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
+    # If there are attached image files, use multimodal processing
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
         thread.start()
         buffer = ""
+        # Yield a progress bar with label "Thinking..."
+        progress_component = gr.HTML(progress_bar_html("Thinking..."))
+        yield progress_component
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
             time.sleep(0.01)
+        # Clear the progress bar and yield the final result text.
+        yield gr.HTML.update(value="")
+        yield buffer
     else:
+        # For pure text responses:
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         t = Thread(target=model.generate, kwargs=generation_kwargs)
         t.start()
         outputs = []
+        # Yield a progress bar with label "Thinking..."
+        progress_component = gr.HTML(progress_bar_html("Thinking..."))
+        yield progress_component
         for new_text in streamer:
             outputs.append(new_text)
         final_response = "".join(outputs)
+        # Clear the progress bar and yield the final plain text result.
+        yield gr.HTML.update(value="")
+        yield final_response
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice: