Spaces:
Runtime error
Runtime error
import gradio as gr | |
import subprocess | |
import time | |
# import os # Not strictly needed in *this* version of app.py as no env vars are read | |
# --- Ollama Helper Functions --- | |
def check_ollama_running(): | |
"""Checks if the Ollama service is accessible.""" | |
try: | |
subprocess.run(["ollama", "ps"], check=True, capture_output=True, timeout=5) | |
return True | |
except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired): | |
return False | |
def get_ollama_models(): | |
"""Gets a list of locally available Ollama models.""" | |
# Removed the 'if not check_ollama_running(): return []' | |
# because it's called after AVAILABLE_MODELS is determined, | |
# and check_ollama_running is implicitly done by the initial AVAILABLE_MODELS load. | |
# However, in a container, Ollama should be running. | |
try: | |
result = subprocess.run(["ollama", "list"], check=True, capture_output=True, text=True, timeout=10) | |
models = [] | |
lines = result.stdout.strip().split("\n") | |
if len(lines) > 1: | |
for line in lines[1:]: | |
parts = line.split() | |
if parts: | |
models.append(parts[0]) | |
# Ensure models are sorted and unique for consistent dropdown | |
return sorted(list(set(models))) | |
except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired) as e: | |
print(f"Error in get_ollama_models: {e}") # Added a print for debugging | |
return [] | |
# --- Core Logic --- | |
# Typing speed simulation | |
CHAR_DELAY = 0.02 # Adjust for desired speed (0.01 is fast, 0.05 is slower) | |
def reasoning_ollama_stream(model_name, prompt, mode): # Renamed prompt_text back to prompt | |
""" | |
Streams response from an Ollama model with simulated typing speed. | |
""" | |
if not model_name: | |
yield "Error: No model selected. Please choose a model." | |
return | |
if not prompt.strip(): # Using original 'prompt' variable name | |
yield "Error: Prompt cannot be empty." | |
return | |
# This check is good for robustness, even in Docker. | |
if not check_ollama_running(): | |
yield "Error: Ollama service does not seem to be running or accessible. Please start Ollama." | |
return | |
# This is a runtime check. The Dockerfile aims to pull models, but this confirms. | |
available_models_runtime = get_ollama_models() | |
if model_name not in available_models_runtime: | |
yield f"Error: Model '{model_name}' selected, but not found by Ollama at runtime. Available: {available_models_runtime}. Please ensure it was pulled." | |
return | |
# Using original 'prompt' and 'mode' | |
prompt_with_mode = f"{prompt.strip()} /{mode}" | |
command = ["ollama", "run", model_name] | |
displayed_response = "" | |
try: | |
process = subprocess.Popen( | |
command, | |
stdin=subprocess.PIPE, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
text=True, | |
bufsize=1, | |
universal_newlines=True, | |
) | |
process.stdin.write(prompt_with_mode + "\n") | |
process.stdin.close() | |
for line_chunk in iter(process.stdout.readline, ""): | |
if not line_chunk and process.poll() is not None: # Check if process ended | |
break | |
for char in line_chunk: | |
displayed_response += char | |
yield displayed_response | |
if char.strip(): # Only sleep for non-whitespace characters | |
time.sleep(CHAR_DELAY) | |
process.stdout.close() | |
return_code = process.wait(timeout=10) # Added timeout to wait | |
if return_code != 0: | |
error_output = process.stderr.read() | |
error_message = f"\n\n--- Ollama Error (code {return_code}) ---\n{error_output.strip()}" | |
if displayed_response and not displayed_response.endswith(error_message): | |
displayed_response += error_message | |
elif not displayed_response: | |
displayed_response = error_message.strip() | |
yield displayed_response | |
return | |
if not displayed_response.strip() and return_code == 0: | |
yield "Model returned an empty response." | |
elif displayed_response: | |
yield displayed_response | |
except FileNotFoundError: | |
yield "Error: 'ollama' command not found. Please ensure Ollama is installed and in your PATH (or Dockerfile is correct)." | |
except subprocess.TimeoutExpired: # Catch timeout from process.wait() | |
yield "Error: Ollama process timed out while waiting for completion." | |
if displayed_response: | |
yield displayed_response | |
except Exception as e: | |
yield f"An unexpected error occurred: {str(e)}" | |
if displayed_response: | |
yield displayed_response | |
# --- Gradio UI --- | |
# This runs once when the script starts. | |
# In Docker, this will query the Ollama instance inside the container AFTER models are pulled by CMD. | |
AVAILABLE_MODELS = get_ollama_models() | |
QWEN_MODELS = [m for m in AVAILABLE_MODELS if "qwen" in m.lower()] | |
INITIAL_MODEL = None | |
# Prioritize qwen3:4b if available - This logic is from your original app.py | |
if "qwen3:4b" in AVAILABLE_MODELS: | |
INITIAL_MODEL = "qwen3:4b" | |
elif QWEN_MODELS: | |
INITIAL_MODEL = QWEN_MODELS[0] | |
elif AVAILABLE_MODELS: | |
INITIAL_MODEL = AVAILABLE_MODELS[0] | |
# If no models, INITIAL_MODEL remains None, and dropdown will show "No models found..." | |
with gr.Blocks(title="Qwen3 x Ollama", theme=gr.themes.Soft()) as demo: | |
gr.HTML( | |
""" | |
<h1 style='text-align: center'> | |
Qwen3 Reasoning with Ollama | |
</h1> | |
""" | |
) | |
gr.HTML( | |
""" | |
<h3 style='text-align: center'> | |
<a href='https://opencv.org/university/' target='_blank'>OpenCV Courses</a> | <a href='https://github.com/OpenCV-University' target='_blank'>Github</a> | |
</h3> | |
""" | |
) | |
gr.Markdown( | |
""" | |
- Interact with a Qwen3 model hosted on Ollama. | |
- Switch between `/think` and `/no_think` modes to explore the thinking process. | |
- The response will stream with a simulated typing effect. | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
model_selector = gr.Dropdown( | |
label="Select Model", | |
choices=AVAILABLE_MODELS if AVAILABLE_MODELS else ["No models found - check Ollama setup"], | |
value=INITIAL_MODEL, | |
interactive=True, | |
) | |
prompt_input = gr.Textbox( | |
label="Enter your prompt", | |
placeholder="e.g., Explain quantum entanglement in simple terms.", | |
lines=5, | |
elem_id="prompt-input", | |
) | |
mode_radio = gr.Radio( | |
["think", "no_think"], # Kept original modes from your app.py | |
label="Reasoning Mode", | |
value="think", | |
info="`/think` encourages step-by-step reasoning. `/no_think` aims for a direct answer.", | |
) | |
with gr.Row(): | |
submit_button = gr.Button("Generate Response", variant="primary") | |
clear_button = gr.ClearButton() | |
with gr.Column(scale=2): | |
status_output = gr.Textbox( | |
label="Status", | |
interactive=False, | |
lines=1, | |
placeholder="Awaiting submission...", | |
elem_id="status-output", | |
) | |
response_output = gr.Textbox( # Kept as gr.Textbox as requested | |
label="Model Response", lines=20, interactive=False, show_copy_button=True, elem_id="response-output" | |
) | |
def handle_submit_wrapper(model, prompt, mode): | |
yield {status_output: "Processing... Preparing to stream response.", response_output: ""} | |
final_chunk = "" | |
# Using original variable names 'prompt' and 'mode' for reasoning_ollama_stream | |
for chunk in reasoning_ollama_stream(model, prompt, mode): | |
final_chunk = chunk | |
yield {status_output: "Streaming response...", response_output: chunk} | |
if "Error:" in final_chunk or "--- Ollama Error ---" in final_chunk: | |
yield {status_output: "Completed with issues.", response_output: final_chunk} | |
elif "Model returned an empty response." in final_chunk: | |
yield {status_output: "Model returned an empty response.", response_output: final_chunk} | |
elif not final_chunk.strip() and ("Error:" not in final_chunk and "--- Ollama Error ---" not in final_chunk): | |
yield {status_output: "Completed, but no substantive output received.", response_output: ""} | |
else: | |
yield {status_output: "Response generated successfully!", response_output: final_chunk} | |
submit_button.click( | |
fn=handle_submit_wrapper, | |
inputs=[model_selector, prompt_input, mode_radio], | |
outputs=[status_output, response_output], | |
) | |
clear_button.add([prompt_input, response_output, status_output]) | |
# Example model determination logic from your original app.py | |
# Note: This might select a model not actually available if AVAILABLE_MODELS is empty | |
# and the fallback "qwen3:4b" is used. | |
# A safer approach is to ensure example_model is from AVAILABLE_MODELS if possible. | |
example_model_for_ui = INITIAL_MODEL | |
if not example_model_for_ui and AVAILABLE_MODELS: | |
example_model_for_ui = AVAILABLE_MODELS[0] | |
elif not example_model_for_ui: # Fallback if no models and INITIAL_MODEL is None | |
example_model_for_ui = "qwen3:4b" # Default example model | |
gr.Examples( | |
examples=[ | |
[example_model_for_ui, "What are the main pros and cons of using nuclear energy?", "think"], | |
# Fallback for the second example if qwen3:4b isn't a primary choice | |
[ | |
( | |
example_model_for_ui | |
if example_model_for_ui != "qwen3:4b" | |
else (INITIAL_MODEL if INITIAL_MODEL and INITIAL_MODEL != "qwen3:4b" else "qwen3:1.7b") | |
), | |
"Write a short poem about a rainy day.", | |
"no_think", | |
], | |
[example_model_for_ui, "Plan a 3-day trip to Paris, focusing on historical sites.", "think"], | |
], | |
inputs=[model_selector, prompt_input, mode_radio], | |
outputs=[status_output, response_output], | |
fn=handle_submit_wrapper, | |
cache_examples=False, # Cache examples can be True if inputs are static and fn is pure | |
) | |
gr.HTML( | |
""" | |
<h3 style='text-align: center'> | |
Developed with ❤️ by OpenCV | |
</h3> | |
""" | |
) | |
if __name__ == "__main__": | |
print("--- Gradio App Starting ---") # Simplified print | |
print(f"Attempting to fetch Ollama models (initial load)... Result: {AVAILABLE_MODELS}") | |
print(f"Initial model for UI (if any): {INITIAL_MODEL}") | |
print(f"Gradio version: {gr.__version__}") | |
print(f"---------------------------") | |
# For local Docker testing, server_name="0.0.0.0" is important. | |
# For Hugging Face Spaces, demo.launch() is usually enough as it handles proxying. | |
demo.queue().launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False, # Set to True if you need a public link for local testing (requires internet) | |
# share=os.getenv("GRADIO_SHARE", "False").lower() == "true" # If using env var for share | |
) | |