Spaces:
Sleeping
Sleeping
File size: 4,286 Bytes
c758ec0 b899370 a3716db c758ec0 b95201b 979996d b95201b c758ec0 b95201b c758ec0 b899370 b95201b b899370 b95201b b899370 c758ec0 b95201b c758ec0 a7682fa c758ec0 a7682fa 979996d a7682fa b95201b a7682fa b95201b a7682fa b899370 c758ec0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# import gradio as gr
# from huggingface_hub import InferenceClient
# # Step 1: Read your background info
# with open("BACKGROUND.md", "r", encoding="utf-8") as f:
# background_text = f.read()
# # Step 2: Set up your InferenceClient (same as before)
# client = InferenceClient("google/gemma-2-2b-jpn-it")
# # HuggingFaceH4/zephyr-7b-beta
# def respond(
# message,
# history: list[dict],
# system_message: str,
# max_tokens: int,
# temperature: float,
# top_p: float,
# ):
# if history is None:
# history = []
# # Include background text as part of the system message for context
# combined_system_message = f"{system_message}\n\n### Background Information ###\n{background_text}"
# # Start building the conversation history
# messages = [{"role": "system", "content": combined_system_message}]
# # Add conversation history
# for interaction in history:
# if "user" in interaction:
# messages.append({"role": "user", "content": interaction["user"]})
# if "assistant" in interaction:
# messages.append({"role": "assistant", "content": interaction["assistant"]})
# # Add the latest user message
# messages.append({"role": "user", "content": message})
# # Generate response
# response = ""
# for msg in client.chat_completion(
# messages,
# max_tokens=max_tokens,
# stream=True,
# temperature=temperature,
# top_p=top_p,
# ):
# token = msg.choices[0].delta.content
# response += token
# yield response
# print("----- SYSTEM MESSAGE -----")
# print(messages[0]["content"])
# print("----- FULL MESSAGES LIST -----")
# for m in messages:
# print(m)
# print("-------------------------")
# # Step 3: Build a Gradio Blocks interface with two Tabs
# with gr.Blocks() as demo:
# # Tab 1: GPT Chat Agent
# with gr.Tab("GPT Chat Agent"):
# gr.Markdown("## Welcome to Varun's GPT Agent")
# gr.Markdown("Feel free to ask questions about Varun’s journey, skills, and more!")
# chat = gr.ChatInterface(
# fn=respond,
# additional_inputs=[
# gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
# gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
# gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
# gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
# ],
# type="messages", # Specify message type
# )
# # # Tab 2: Background Document
# # with gr.Tab("Varun's Background"):
# # gr.Markdown("# About Varun")
# # gr.Markdown(background_text)
# # Step 4: Launch
# if __name__ == "__main__":
# demo.launch()
import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient("google/gemma-2-2b-jpn-it")
def respond(message, history, system_message, max_tokens, temperature, top_p):
if history is None:
history = []
prompt = f"{system_message}\n\n# Background...\n\n" # etc.
# Build up your prompt from history...
response = ""
for chunk in client.text_generation(
prompt=prompt,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True,
):
# 'chunk' is a string of newly generated text.
response += chunk
yield response
# (Optional) log the final prompt
print("PROMPT:", prompt)
with gr.Blocks() as demo:
with gr.Tab("Gemma Chat Agent"):
chat = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
],
type="messages",
)
if __name__ == "__main__":
demo.launch()
|