Spaces:
Sleeping
Sleeping
File size: 3,928 Bytes
22bd0c2 fcabc39 59b000d be52360 fcabc39 59b000d fcabc39 be52360 fcabc39 f7d8d0c fcabc39 22bd0c2 fcabc39 59b000d fcabc39 f7d8d0c 59b000d f7d8d0c 59b000d f7d8d0c 59b000d f7d8d0c be52360 f7d8d0c fcabc39 f7d8d0c 59b000d f7d8d0c 59b000d f7d8d0c 59b000d f7d8d0c 59b000d f7d8d0c 22bd0c2 59b000d fcabc39 59b000d 22bd0c2 be52360 22bd0c2 59b000d 22bd0c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import gradio as gr
import requests
import os
import json
import re
import html
# Set up the API endpoint and key
API_URL = os.getenv("RUNPOD_API_URL")
API_KEY = os.getenv("RUNPOD_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
def escape_html(text):
return html.escape(text)
def format_response_for_display(text):
# Escape HTML entities
text = escape_html(text)
# Format <thinking> and <reflection> tags (case-insensitive)
text = re.sub(r'(?i)(<thinking>)(.*?)(</thinking>)',
r'<span style="font-family: monospace; color: blue;">\1</span><i>\2</i><span style="font-family: monospace; color: blue;">\3</span>',
text, flags=re.DOTALL)
text = re.sub(r'(?i)(<reflection>)(.*?)(</reflection>)',
r'<span style="font-family: monospace; color: green;">\1</span><i>\2</i><span style="font-family: monospace; color: green;">\3</span>',
text, flags=re.DOTALL)
# Remove <output> tags but keep content
text = re.sub(r'(?i)<output>(.*?)</output>', r'\1', text, flags=re.DOTALL)
# Replace newlines with <br> tags
text = text.replace('\n', '<br>')
return text
def respond(message, history, system_message, max_tokens, temperature, top_p):
if system_message is not None:
messages = [{"role": "system", "content": system_message}]
else:
messages = []
for human, assistant in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
data = {
"model": "forcemultiplier/fmx-reflective-2b",
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p
}
try:
print(f"Sending request to API: {API_URL}")
print(f"Request data: {json.dumps(data, indent=2)}")
response = requests.post(API_URL, headers=headers, json=data)
print(f"Raw API Response: {response.text}")
print(f"Response Status Code: {response.status_code}")
response.raise_for_status()
response_json = response.json()
print(f"Formatted API Response: {json.dumps(response_json, indent=2)}")
if 'choices' in response_json and len(response_json['choices']) > 0:
content = response_json['choices'][0]['message']['content']
formatted_content = format_response_for_display(content)
print(f"Formatted content for display: {formatted_content}") # For debugging
return formatted_content
else:
return f"Error: Unexpected response format. Full response: {response_json}"
except requests.exceptions.RequestException as e:
print(f"Request Exception: {str(e)}")
return f"Error: {str(e)}"
except ValueError as e:
print(f"JSON Parsing Error: {str(e)}")
return f"Error: Invalid JSON response. {str(e)}"
except KeyError as e:
print(f"Key Error: {str(e)}")
return f"Error: Unexpected response structure. Missing key: {str(e)}"
except Exception as e:
print(f"Unexpected Error: {str(e)}")
return f"Unexpected error: {str(e)}"
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
css=".message-wrap { white-space: pre-wrap; }"
)
if __name__ == "__main__":
print(f"Starting application with API URL: {API_URL}")
demo.launch() |