Spaces:
Sleeping
Sleeping
upgraded roles of chatbots
Browse files
app.py
CHANGED
@@ -1,64 +1,169 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
"""
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
messages = [{"role": "system", "content": system_message}]
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
messages.append({"role": "assistant", "content": val[1]})
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
messages.append({"role": "user", "content": message})
|
27 |
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
stream=True,
|
34 |
-
temperature=temperature,
|
35 |
-
top_p=top_p,
|
36 |
-
):
|
37 |
-
token = message.choices[0].delta.content
|
38 |
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
""
|
44 |
-
|
45 |
-
""
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
gr.
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
label="
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
|
|
|
|
|
|
62 |
|
63 |
if __name__ == "__main__":
|
64 |
-
demo.launch()
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
3 |
from huggingface_hub import InferenceClient
|
4 |
|
5 |
+
# ---------------- Role presets ----------------
|
6 |
+
ROLE_PRESETS = {
|
7 |
+
"Friendly Chatbot": "You are a friendly, concise assistant. Be helpful and keep answers short unless asked.",
|
8 |
+
"Dataset Auditor": (
|
9 |
+
"You are a dataset QA assistant. Identify duplicates, missing labels, class imbalance, and suspicious samples.\n"
|
10 |
+
"Propose concrete fixes and a short remediation checklist."
|
11 |
+
),
|
12 |
+
"SQL Explainer": (
|
13 |
+
"Translate SQL into plain language for non-technical users. Explain step-by-step, then give a short summary."
|
14 |
+
),
|
15 |
+
"Code Reviewer": (
|
16 |
+
"Review code succinctly: correctness, readability, performance, edge cases, and security. Provide minimal diffs."
|
17 |
+
),
|
18 |
+
"Data Pipeline Doctor": (
|
19 |
+
"Debug data pipelines. Use: hypothesis ➜ quick checks ➜ likely fix order. Ask for logs only when needed."
|
20 |
+
),
|
21 |
+
"Data Engineering Advisor": (
|
22 |
+
"Advise on building robust, scalable data pipelines. Suggest architecture patterns, tools, and optimizations."
|
23 |
+
),
|
24 |
+
"ML Dataset Preparer": (
|
25 |
+
"Guide on preparing datasets for machine learning: cleaning, splitting, augmentation, and annotation strategies."
|
26 |
+
),
|
27 |
+
"Data Quality Analyst": (
|
28 |
+
"Evaluate data quality: detect anomalies, missing values, schema mismatches, and provide a remediation plan."
|
29 |
+
),
|
30 |
+
}
|
31 |
|
32 |
+
# ---------------- Examples per role ----------------
|
33 |
+
ROLE_EXAMPLES = {
|
34 |
+
"Friendly Chatbot": [
|
35 |
+
"Explain embeddings in one paragraph.",
|
36 |
+
"What's a neat productivity trick for engineers?"
|
37 |
+
],
|
38 |
+
"Dataset Auditor": [
|
39 |
+
"10k images (cats/dogs/birds). First-pass audit plan?",
|
40 |
+
"Class imbalance & duplicates: quick remediation checklist."
|
41 |
+
],
|
42 |
+
"SQL Explainer": [
|
43 |
+
"Explain for a PM:\nSELECT u.name, COUNT(*) c FROM orders o JOIN users u ON u.id=o.user_id GROUP BY u.name HAVING COUNT(*)>5;",
|
44 |
+
"What does this CTE do and why use it?"
|
45 |
+
],
|
46 |
+
"Code Reviewer": [
|
47 |
+
"Review Python for edge cases:\n\ndef top_k(nums, k):\n return sorted(nums)[-k:]",
|
48 |
+
"Find race conditions in this multi-threaded write snippet."
|
49 |
+
],
|
50 |
+
"Data Pipeline Doctor": [
|
51 |
+
"ETL fails randomly after 10k rows out of 2M. Where to look first?",
|
52 |
+
"Parquet load spikes memory; pragmatic fixes?"
|
53 |
+
],
|
54 |
+
"Data Engineering Advisor": [
|
55 |
+
"Design a batch + streaming pipeline for clickstream analytics.",
|
56 |
+
"What’s the best way to partition a large parquet dataset for Athena?"
|
57 |
+
],
|
58 |
+
"ML Dataset Preparer": [
|
59 |
+
"Steps to prepare a face recognition dataset from raw videos.",
|
60 |
+
"Best augmentation methods for small medical image datasets?"
|
61 |
+
],
|
62 |
+
"Data Quality Analyst": [
|
63 |
+
"Detect anomalies in tabular data with numerical and categorical columns.",
|
64 |
+
"Checklist for ensuring schema consistency across multiple CSV files."
|
65 |
+
],
|
66 |
+
}
|
67 |
|
68 |
+
# ---------------- Models ----------------
|
69 |
+
DEFAULT_MODELS = [
|
70 |
+
"HuggingFaceH4/zephyr-7b-beta",
|
71 |
+
"meta-llama/Meta-Llama-3-8B-Instruct",
|
72 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
73 |
+
"Qwen/Qwen2.5-7B-Instruct",
|
74 |
+
"google/gemma-2-9b-it",
|
75 |
+
]
|
|
|
76 |
|
77 |
+
# ---------------- Client factory ----------------
|
78 |
+
def get_client(model_name: str):
|
79 |
+
token = os.getenv("HF_TOKEN") # optional
|
80 |
+
return InferenceClient(model=model_name, token=token)
|
|
|
81 |
|
82 |
+
# ---------------- Chat backend (streaming) ----------------
|
83 |
+
def stream_reply(message, history, role, system_message, model_name, max_tokens, temperature, top_p):
|
84 |
+
sys_msg = (system_message or "").strip() or ROLE_PRESETS.get(role, "")
|
85 |
+
messages = [{"role": "system", "content": sys_msg}]
|
86 |
+
for u, a in history:
|
87 |
+
if u:
|
88 |
+
messages.append({"role": "user", "content": u})
|
89 |
+
if a:
|
90 |
+
messages.append({"role": "assistant", "content": a})
|
91 |
messages.append({"role": "user", "content": message})
|
92 |
|
93 |
+
client = get_client(model_name)
|
94 |
+
partial = ""
|
95 |
+
try:
|
96 |
+
for event in client.chat_completion(
|
97 |
+
messages=messages,
|
98 |
+
stream=True,
|
99 |
+
max_tokens=max_tokens,
|
100 |
+
temperature=temperature,
|
101 |
+
top_p=top_p,
|
102 |
+
):
|
103 |
+
delta = event.choices[0].delta.content or ""
|
104 |
+
if delta:
|
105 |
+
partial += delta
|
106 |
+
yield partial
|
107 |
+
except Exception as e:
|
108 |
+
yield f"⚠️ Inference error: {e}"
|
109 |
|
110 |
+
# ---------------- UI ----------------
|
111 |
+
with gr.Blocks(title="HF Chat • Data Roles") as demo:
|
112 |
+
gr.Markdown("## 🤗 Hugging Face Chat (Data-focused Roles + Examples)")
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
+
with gr.Row():
|
115 |
+
role_dd = gr.Dropdown(
|
116 |
+
label="Role preset",
|
117 |
+
choices=list(ROLE_PRESETS.keys()),
|
118 |
+
value="Friendly Chatbot",
|
119 |
+
interactive=True,
|
120 |
+
)
|
121 |
+
model_dd = gr.Dropdown(
|
122 |
+
label="Model (type any HF model ID)",
|
123 |
+
choices=DEFAULT_MODELS,
|
124 |
+
value=DEFAULT_MODELS[0],
|
125 |
+
allow_custom_value=True,
|
126 |
+
interactive=True,
|
127 |
+
)
|
128 |
|
129 |
+
system_tb = gr.Textbox(
|
130 |
+
label="System message (auto-filled by role; you can edit)",
|
131 |
+
value=ROLE_PRESETS["Friendly Chatbot"],
|
132 |
+
lines=4,
|
133 |
+
)
|
134 |
|
135 |
+
with gr.Accordion("Examples for selected role", open=False):
|
136 |
+
ex_radio = gr.Radio(
|
137 |
+
label="Pick an example to insert into the input",
|
138 |
+
choices=ROLE_EXAMPLES["Friendly Chatbot"],
|
139 |
+
interactive=True,
|
140 |
+
)
|
141 |
+
insert_btn = gr.Button("Insert example into input")
|
142 |
+
|
143 |
+
chat = gr.ChatInterface(
|
144 |
+
fn=stream_reply,
|
145 |
+
additional_inputs=[
|
146 |
+
role_dd,
|
147 |
+
system_tb,
|
148 |
+
model_dd,
|
149 |
+
gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"),
|
150 |
+
gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature"),
|
151 |
+
gr.Slider(0.0, 1.0, value=0.95, step=0.05, label="Top-p"),
|
152 |
+
],
|
153 |
+
)
|
154 |
+
|
155 |
+
def _on_role_change(role):
|
156 |
+
return ROLE_PRESETS.get(role, "")
|
157 |
+
role_dd.change(fn=_on_role_change, inputs=role_dd, outputs=system_tb)
|
158 |
+
|
159 |
+
def _examples_for_role(role):
|
160 |
+
items = ROLE_EXAMPLES.get(role, [])
|
161 |
+
return gr.update(choices=items, value=(items[0] if items else None))
|
162 |
+
role_dd.change(fn=_examples_for_role, inputs=role_dd, outputs=ex_radio)
|
163 |
|
164 |
+
def _insert_example(example_text):
|
165 |
+
return gr.update(value=example_text or "")
|
166 |
+
insert_btn.click(fn=_insert_example, inputs=ex_radio, outputs=chat.textbox)
|
167 |
|
168 |
if __name__ == "__main__":
|
169 |
+
demo.launch()
|