File size: 4,286 Bytes
c758ec0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b899370
 
 
a3716db
c758ec0
b95201b
979996d
 
 
b95201b
 
c758ec0
b95201b
c758ec0
 
 
b899370
 
b95201b
b899370
b95201b
 
b899370
c758ec0
b95201b
 
c758ec0
 
a7682fa
c758ec0
a7682fa
979996d
a7682fa
 
 
 
b95201b
a7682fa
b95201b
a7682fa
b899370
 
 
c758ec0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# import gradio as gr
# from huggingface_hub import InferenceClient

# # Step 1: Read your background info
# with open("BACKGROUND.md", "r", encoding="utf-8") as f:
#     background_text = f.read()

# # Step 2: Set up your InferenceClient (same as before)
# client = InferenceClient("google/gemma-2-2b-jpn-it")
# # HuggingFaceH4/zephyr-7b-beta
# def respond(
#     message,
#     history: list[dict],
#     system_message: str,
#     max_tokens: int,
#     temperature: float,
#     top_p: float,
# ):
#     if history is None:
#         history = []

#     # Include background text as part of the system message for context
#     combined_system_message = f"{system_message}\n\n### Background Information ###\n{background_text}"

#     # Start building the conversation history
#     messages = [{"role": "system", "content": combined_system_message}]
    
#     # Add conversation history
#     for interaction in history:
#         if "user" in interaction:
#             messages.append({"role": "user", "content": interaction["user"]})
#         if "assistant" in interaction:
#             messages.append({"role": "assistant", "content": interaction["assistant"]})

#     # Add the latest user message
#     messages.append({"role": "user", "content": message})

#     # Generate response
#     response = ""
#     for msg in client.chat_completion(
#         messages,
#         max_tokens=max_tokens,
#         stream=True,
#         temperature=temperature,
#         top_p=top_p,
#     ):
        
#         token = msg.choices[0].delta.content
#         response += token
#         yield response
#     print("----- SYSTEM MESSAGE -----")
#     print(messages[0]["content"])
#     print("----- FULL MESSAGES LIST -----")
#     for m in messages:
#         print(m)
#     print("-------------------------")
    
# # Step 3: Build a Gradio Blocks interface with two Tabs
# with gr.Blocks() as demo:
#     # Tab 1: GPT Chat Agent
#     with gr.Tab("GPT Chat Agent"):
#         gr.Markdown("## Welcome to Varun's GPT Agent")
#         gr.Markdown("Feel free to ask questions about Varun’s journey, skills, and more!")
#         chat = gr.ChatInterface(
#             fn=respond,
#             additional_inputs=[
#                 gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
#                 gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
#                 gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
#                 gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
#             ],
#             type="messages",  # Specify message type
#         )

#     # # Tab 2: Background Document
#     # with gr.Tab("Varun's Background"):
#     #     gr.Markdown("# About Varun")
#     #     gr.Markdown(background_text)

# # Step 4: Launch
# if __name__ == "__main__":
#     demo.launch()

import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient("google/gemma-2-2b-jpn-it")

def respond(message, history, system_message, max_tokens, temperature, top_p):
    if history is None:
        history = []

    prompt = f"{system_message}\n\n# Background...\n\n"  # etc.
    # Build up your prompt from history...

    response = ""
    for chunk in client.text_generation(
        prompt=prompt,
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        stream=True,
    ):
        # 'chunk' is a string of newly generated text.
        response += chunk
        yield response

    # (Optional) log the final prompt
    print("PROMPT:", prompt)


with gr.Blocks() as demo:
    with gr.Tab("Gemma Chat Agent"):
        chat = gr.ChatInterface(
            fn=respond,
            additional_inputs=[
                gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
                gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
                gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
                gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
            ],
            type="messages",
        )

if __name__ == "__main__":
    demo.launch()