File size: 3,473 Bytes
c320f27
 
 
a556a7a
aafb493
 
 
 
a556a7a
 
 
 
c320f27
 
a556a7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c320f27
a556a7a
 
 
 
 
c320f27
 
 
 
 
a556a7a
 
 
 
 
c320f27
a556a7a
c320f27
 
 
 
a556a7a
c320f27
 
 
a556a7a
c320f27
 
a556a7a
c320f27
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
from huggingface_hub import InferenceClient

# Initialize Inference Clients for all models
paligemma224_client = InferenceClient("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
paligemma448_client = InferenceClient("microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned")
paligemma896_client = InferenceClient("ProsusAI/finbert")
paligemma28b_client = InferenceClient("Skywork/Skywork-o1-Open-PRM-Qwen-2.5-1.5B")
llama_client = InferenceClient("llama/3.3-1b")
deepseek_client = InferenceClient("deepseek-ai/deepseek-vl2")
omniparser_client = InferenceClient("microsoft/OmniParser")
pixtral_client = InferenceClient("mistralai/Pixtral-12B-2409")


def enhance_prompt(prompt: str) -> str:
    # Use the Paligemma models for prompt enhancement
    prompt_224 = paligemma224_client.infer(prompt)
    prompt_448 = paligemma448_client.infer(prompt)
    prompt_896 = paligemma896_client.infer(prompt)
    
    # Combine all enhanced prompts into a single one
    enhanced_prompt = f"Enhanced (224): {prompt_224}\nEnhanced (448): {prompt_448}\nEnhanced (896): {prompt_896}"
    
    # Ultra-enhance the prompt using Paligemma 28b
    ultra_enhanced_prompt = paligemma28b_client.infer(enhanced_prompt)
    
    return ultra_enhanced_prompt


def generate_answer(enhanced_prompt: str) -> str:
    # Generate answers using the three models: llama, deepseek, and omniparser
    llama_answer = llama_client.infer(enhanced_prompt)
    deepseek_answer = deepseek_client.infer(enhanced_prompt)
    omniparser_answer = omniparser_client.infer(enhanced_prompt)
    
    # Combine answers from all models
    combined_answer = f"Llama: {llama_answer}\nDeepseek: {deepseek_answer}\nOmniparser: {omniparser_answer}"
    
    return combined_answer


def enhance_answer(answer: str) -> str:
    # Enhance the final answer using Pixtral model
    enhanced_answer = pixtral_client.infer(answer)
    return enhanced_answer


def process(message: str) -> str:
    # Step 1: Enhance the prompt using the Paligemma models
    enhanced_prompt = enhance_prompt(message)
    
    # Step 2: Generate an answer using the three models
    answer = generate_answer(enhanced_prompt)
    
    # Step 3: Enhance the generated answer using Pixtral
    final_answer = enhance_answer(answer)
    
    return final_answer


# Gradio interface setup
def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
    # Include system message and history in conversation
    messages = [{"role": "system", "content": system_message}]
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    
    # Get the final enhanced response
    final_answer = process(message)
    
    # Yield the response for the Gradio interface
    response = ""
    for token in final_answer:
        response += token
        yield response


# Gradio interface setup
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a helpful assistant.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
)

if __name__ == "__main__":
    demo.launch()