File size: 6,297 Bytes
e87d259
 
 
 
 
c3d7f6e
e87d259
 
 
 
 
 
 
 
c3d7f6e
 
 
e87d259
 
 
 
 
 
 
 
c3d7f6e
e87d259
 
c3d7f6e
e87d259
 
 
 
 
 
 
630bed8
e87d259
 
 
 
 
 
 
 
 
 
 
 
630bed8
e87d259
 
 
 
 
 
c3d7f6e
e87d259
 
 
 
 
a6f008b
e87d259
a6f008b
 
630bed8
e87d259
 
 
 
 
 
 
 
 
a6f008b
e87d259
 
a6f008b
 
e87d259
 
 
 
 
c3d7f6e
 
 
 
 
e87d259
 
 
 
 
 
 
 
 
 
 
c3d7f6e
e87d259
 
c3d7f6e
e87d259
c3d7f6e
 
e87d259
 
c3d7f6e
 
 
e87d259
c3d7f6e
e87d259
 
c3d7f6e
e87d259
 
 
 
c3d7f6e
e87d259
 
c3d7f6e
e87d259
c3d7f6e
 
e87d259
c3d7f6e
e87d259
c3d7f6e
 
 
e87d259
 
c3d7f6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e87d259
 
 
c3d7f6e
 
 
 
 
 
 
e87d259
c3d7f6e
 
e87d259
c3d7f6e
 
e87d259
c3d7f6e
 
 
 
 
 
 
a6f008b
c3d7f6e
 
a6f008b
c3d7f6e
e87d259
c3d7f6e
e87d259
 
1923879
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from accelerate import Accelerator

# Check if GPU is available for better performance
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize the Accelerator for optimized inference
accelerator = Accelerator()

# Load models and tokenizers with FP16 for speed optimization if GPU is available
model_dirs = [
    "muhammadAhmed22/fine_tuned_gpt2",
    "muhammadAhmed22/MiriFurgpt2-recipes",
    "muhammadAhmed22/auhide-chef-gpt-en"
]

models = {}
tokenizers = {}

def load_model(model_dir):
    model = AutoModelForCausalLM.from_pretrained(model_dir, torch_dtype=torch.float16 if device.type == "cuda" else torch.float32)
    tokenizer = AutoTokenizer.from_pretrained(model_dir)
    
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    # Move model to GPU/CPU as per availability
    model = model.to(device)
    return model, tokenizer

# Load all models
for model_dir in model_dirs:
    model_name = model_dir.split("/")[-1]
    try:
        model, tokenizer = load_model(model_dir)
        models[model_name] = model
        tokenizers[model_name] = tokenizer

        # Batch warm-up inference to reduce initial response time
        dummy_inputs = ["Hello", "What is a recipe?", "Explain cooking basics"]
        for dummy_input in dummy_inputs:
            input_ids = tokenizer.encode(dummy_input, return_tensors='pt').to(device)
            with torch.no_grad():
                model.generate(input_ids, max_new_tokens=1)

        print(f"Loaded model and tokenizer from {model_dir}.")
    except Exception as e:
        print(f"Failed to load model from {model_dir}: {e}")
        continue

def get_response(prompt, model_name, user_type):
    if model_name not in models:
        return "Model not loaded correctly."
    
    model = models[model_name]
    tokenizer = tokenizers[model_name]

    # Define different prompt templates based on user type
    user_type_templates = {
        "Professional": f"As a professional chef, {prompt}\nAnswer:",
        "Beginner": f"Explain in simple terms: {prompt}\nAnswer:",
        "Intermediate": f"As an intermediate cook, {prompt}\nAnswer:",
        "Expert": f"As an expert chef, {prompt}\nAnswer:"
    }

    # Get the appropriate prompt based on user type
    prompt_template = user_type_templates.get(user_type, f"{prompt}\nAnswer:")

    encoding = tokenizer(
        prompt_template,
        return_tensors='pt',
        padding=True,
        truncation=True,
        max_length=512  # Increased max length for larger inputs
    ).to(device)

    # Increase max_new_tokens for longer responses
    max_new_tokens = 200  # Increase this to allow more content in response

    with torch.no_grad():
        output = model.generate(
            input_ids=encoding['input_ids'],
            attention_mask=encoding['attention_mask'],
            max_new_tokens=max_new_tokens,
            num_beams=1,         # Using greedy decoding (faster)
            repetition_penalty=1.1,
            temperature=0.7,     # Slightly reduced for better performance
            top_p=0.85,          # Reduced top_p for faster results
            early_stopping=True,
            pad_token_id=tokenizer.pad_token_id
        )

    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response.strip()

def process_input(prompt, model_name, user_type):
    if prompt and prompt.strip():
        return get_response(prompt, model_name, user_type)
    else:
        return "Please provide a valid prompt."

# Gradio Interface with Modern Design
with gr.Blocks(css=""" 
body {
    background-color: #f8f8f8;
    font-family: 'Helvetica Neue', Arial, sans-serif;
}
.title {
    font-size: 2.6rem;
    font-weight: 700;
    color: #ff6347;
    text-align: center;
    margin-bottom: 1.5rem;
}
.container {
    max-width: 800px;
    margin: auto;
    padding: 2rem;
    background-color: #ffffff;
    border-radius: 10px;
    box-shadow: 0 12px 24px rgba(0, 0, 0, 0.1);
}
.button {
    background-color: #ff6347;
    color: white;
    padding: 0.8rem 1.8rem;
    font-size: 1.1rem;
    border: none;
    border-radius: 8px;
    cursor: pointer;
    transition: background-color 0.3s ease;
    margin-top: 1.5rem;
    width: 100%;
}
.button:hover {
    background-color: #ff4500;
}
.gradio-interface .gr-textbox {
    margin-bottom: 1.5rem;
    width: 100%;
    border-radius: 8px;
    padding: 1rem;
    border: 1px solid #ddd;
    font-size: 1rem;
    background-color: #f9f9f9;
    color: #333;
}
.gradio-interface .gr-radio, .gradio-interface .gr-dropdown {
    margin-bottom: 1.5rem;
    width: 100%;
    border-radius: 8px;
    padding: 1rem;
    border: 1px solid #ddd;
    background-color: #f9f9f9;
    font-size: 1rem;
    color: #333;
}
.gradio-interface .gr-textbox[readonly] {
    background-color: #f5f5f5;
    color: #333;
    font-size: 1rem;
}
""") as demo:

    gr.Markdown("<div class='title'>Cookspert: Your Personal AI Chef</div>")

    user_types = ["Professional", "Beginner", "Intermediate", "Expert"]

    with gr.Column(scale=1, min_width=350):
        # Prompt Section
        prompt = gr.Textbox(label="Enter Your Cooking Question", placeholder="What would you like to ask?", lines=3)

        # Model Selection Section
        model_name = gr.Radio(label="Choose Model", choices=list(models.keys()), interactive=True)

        # User Type Selection
        user_type = gr.Dropdown(label="Select Your Skill Level", choices=user_types, value="Home Cook")

        # Submit Button
        submit_button = gr.Button("chef gpt", elem_classes="button")

        # Response Section
        response = gr.Textbox(
            label="Response",
            placeholder="Your answer will appear here...",
            lines=15,  # Increased lines for a longer response display
            interactive=False,
            show_copy_button=True,
            max_lines=20  # Allow for more lines if the response is lengthy
        )

    submit_button.click(fn=process_input, inputs=[prompt, model_name, user_type], outputs=response)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", share=True, debug=True)