Mubbashir Ahmed commited on
Commit
4f154d7
·
1 Parent(s): 697c8ae

commented mixtral code

Browse files
Files changed (1) hide show
  1. app.py +25 -24
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
- from transformers import AutoTokenizer, AutoModelForCausalLM
5
- import torch
6
 
7
  HF_TOKEN = os.environ.get("HF_TOKEN")
8
 
@@ -20,13 +20,13 @@ qwen_client = InferenceClient(
20
  )
21
 
22
  # ------------------------
23
- # Mixtral Local Setup
24
  # ------------------------
25
- mixtral_model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
26
- mixtral_tokenizer = AutoTokenizer.from_pretrained(mixtral_model_id)
27
- mixtral_model = AutoModelForCausalLM.from_pretrained(
28
- mixtral_model_id, torch_dtype=torch.float16
29
- ).to("cuda")
30
 
31
  # ------------------------
32
  # Unified Inference Function with Chat History
@@ -49,21 +49,22 @@ def run_model_with_history(model_name, user_input, chat_history):
49
  )
50
  reply = result.choices[0].message.content
51
 
52
- elif model_name == "Mixtral 8x7B":
53
- full_prompt = ""
54
- for msg in messages:
55
- prefix = "User: " if msg["role"] == "user" else "Assistant: "
56
- full_prompt += f"{prefix}{msg['content']}\n"
57
- inputs = mixtral_tokenizer(full_prompt, return_tensors="pt").to("cuda")
58
- outputs = mixtral_model.generate(
59
- **inputs,
60
- max_new_tokens=512,
61
- do_sample=True,
62
- temperature=0.7,
63
- top_k=50,
64
- top_p=0.95
65
- )
66
- reply = mixtral_tokenizer.decode(outputs[0], skip_special_tokens=True)
 
67
 
68
  else:
69
  reply = "❌ Invalid model selection."
@@ -90,7 +91,7 @@ with gr.Blocks() as demo:
90
  gr.Markdown("## 🧠 Generative AI Model Evaluation with Context")
91
 
92
  model_choice = gr.Dropdown(
93
- choices=["LLaMA 4", "Qwen3 14B", "Mixtral 8x7B"],
94
  label="Select Model",
95
  value="LLaMA 4"
96
  )
 
1
  import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ # import torch
6
 
7
  HF_TOKEN = os.environ.get("HF_TOKEN")
8
 
 
20
  )
21
 
22
  # ------------------------
23
+ # Mixtral Local Setup (DISABLED)
24
  # ------------------------
25
+ # mixtral_model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
26
+ # mixtral_tokenizer = AutoTokenizer.from_pretrained(mixtral_model_id)
27
+ # mixtral_model = AutoModelForCausalLM.from_pretrained(
28
+ # mixtral_model_id, torch_dtype=torch.float16
29
+ # ).to("cuda")
30
 
31
  # ------------------------
32
  # Unified Inference Function with Chat History
 
49
  )
50
  reply = result.choices[0].message.content
51
 
52
+ # Mixtral section disabled due to space constraints
53
+ # elif model_name == "Mixtral 8x7B":
54
+ # full_prompt = ""
55
+ # for msg in messages:
56
+ # prefix = "User: " if msg["role"] == "user" else "Assistant: "
57
+ # full_prompt += f"{prefix}{msg['content']}\n"
58
+ # inputs = mixtral_tokenizer(full_prompt, return_tensors="pt").to("cuda")
59
+ # outputs = mixtral_model.generate(
60
+ # **inputs,
61
+ # max_new_tokens=512,
62
+ # do_sample=True,
63
+ # temperature=0.7,
64
+ # top_k=50,
65
+ # top_p=0.95
66
+ # )
67
+ # reply = mixtral_tokenizer.decode(outputs[0], skip_special_tokens=True)
68
 
69
  else:
70
  reply = "❌ Invalid model selection."
 
91
  gr.Markdown("## 🧠 Generative AI Model Evaluation with Context")
92
 
93
  model_choice = gr.Dropdown(
94
+ choices=["LLaMA 4", "Qwen3 14B"], # "Mixtral 8x7B" removed
95
  label="Select Model",
96
  value="LLaMA 4"
97
  )