Spaces:

vmagotr1
/

Varun-Journey

Sleeping

App Files Files Community

vmagotr1 commited on Dec 30, 2024

Commit

c758ec0

verified ·

1 Parent(s): a3716db

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -33

app.py CHANGED Viewed

@@ -1,3 +1,86 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
@@ -5,9 +88,9 @@ from huggingface_hub import InferenceClient
 with open("BACKGROUND.md", "r", encoding="utf-8") as f:
     background_text = f.read()
-# Step 2: Set up your InferenceClient (same as before)
 client = InferenceClient("google/gemma-2-2b-jpn-it")
-# HuggingFaceH4/zephyr-7b-beta
 def respond(
     message,
     history: list[dict],
@@ -16,49 +99,54 @@ def respond(
     temperature: float,
     top_p: float,
 ):
     if history is None:
         history = []
-    # Include background text as part of the system message for context
-    combined_system_message = f"{system_message}\n\n### Background Information ###\n{background_text}"
-    # Start building the conversation history
-    messages = [{"role": "system", "content": combined_system_message}]
-    # Add conversation history
     for interaction in history:
         if "user" in interaction:
-            messages.append({"role": "user", "content": interaction["user"]})
         if "assistant" in interaction:
-            messages.append({"role": "assistant", "content": interaction["assistant"]})
-    # Add the latest user message
-    messages.append({"role": "user", "content": message})
-    # Generate response
     response = ""
-    for msg in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
-        token = msg.choices[0].delta.content
-        response += token
         yield response
-    print("----- SYSTEM MESSAGE -----")
-    print(messages[0]["content"])
-    print("----- FULL MESSAGES LIST -----")
-    for m in messages:
-        print(m)
-    print("-------------------------")
 # Step 3: Build a Gradio Blocks interface with two Tabs
 with gr.Blocks() as demo:
-    # Tab 1: GPT Chat Agent
-    with gr.Tab("GPT Chat Agent"):
         gr.Markdown("## Welcome to Varun's GPT Agent")
         gr.Markdown("Feel free to ask questions about Varun’s journey, skills, and more!")
         chat = gr.ChatInterface(
@@ -69,10 +157,10 @@ with gr.Blocks() as demo:
                 gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
                 gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
             ],
-            type="messages",  # Specify message type
         )
-    # # Tab 2: Background Document
     # with gr.Tab("Varun's Background"):
     #     gr.Markdown("# About Varun")
     #     gr.Markdown(background_text)
@@ -80,3 +168,4 @@ with gr.Blocks() as demo:
 # Step 4: Launch
 if __name__ == "__main__":
     demo.launch()

+# import gradio as gr
+# from huggingface_hub import InferenceClient
+# # Step 1: Read your background info
+# with open("BACKGROUND.md", "r", encoding="utf-8") as f:
+#     background_text = f.read()
+# # Step 2: Set up your InferenceClient (same as before)
+# client = InferenceClient("google/gemma-2-2b-jpn-it")
+# # HuggingFaceH4/zephyr-7b-beta
+# def respond(
+#     message,
+#     history: list[dict],
+#     system_message: str,
+#     max_tokens: int,
+#     temperature: float,
+#     top_p: float,
+# ):
+#     if history is None:
+#         history = []
+#     # Include background text as part of the system message for context
+#     combined_system_message = f"{system_message}\n\n### Background Information ###\n{background_text}"
+#     # Start building the conversation history
+#     messages = [{"role": "system", "content": combined_system_message}]
+#     # Add conversation history
+#     for interaction in history:
+#         if "user" in interaction:
+#             messages.append({"role": "user", "content": interaction["user"]})
+#         if "assistant" in interaction:
+#             messages.append({"role": "assistant", "content": interaction["assistant"]})
+#     # Add the latest user message
+#     messages.append({"role": "user", "content": message})
+#     # Generate response
+#     response = ""
+#     for msg in client.chat_completion(
+#         messages,
+#         max_tokens=max_tokens,
+#         stream=True,
+#         temperature=temperature,
+#         top_p=top_p,
+#     ):
+#         token = msg.choices[0].delta.content
+#         response += token
+#         yield response
+#     print("----- SYSTEM MESSAGE -----")
+#     print(messages[0]["content"])
+#     print("----- FULL MESSAGES LIST -----")
+#     for m in messages:
+#         print(m)
+#     print("-------------------------")
+# # Step 3: Build a Gradio Blocks interface with two Tabs
+# with gr.Blocks() as demo:
+#     # Tab 1: GPT Chat Agent
+#     with gr.Tab("GPT Chat Agent"):
+#         gr.Markdown("## Welcome to Varun's GPT Agent")
+#         gr.Markdown("Feel free to ask questions about Varun’s journey, skills, and more!")
+#         chat = gr.ChatInterface(
+#             fn=respond,
+#             additional_inputs=[
+#                 gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+#                 gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+#                 gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+#                 gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
+#             ],
+#             type="messages",  # Specify message type
+#         )
+#     # # Tab 2: Background Document
+#     # with gr.Tab("Varun's Background"):
+#     #     gr.Markdown("# About Varun")
+#     #     gr.Markdown(background_text)
+# # Step 4: Launch
+# if __name__ == "__main__":
+#     demo.launch()
 import gradio as gr
 from huggingface_hub import InferenceClient
 with open("BACKGROUND.md", "r", encoding="utf-8") as f:
     background_text = f.read()
+# Step 2: Set up your InferenceClient (using text-generation instead of chat)
 client = InferenceClient("google/gemma-2-2b-jpn-it")
 def respond(
     message,
     history: list[dict],
     temperature: float,
     top_p: float,
 ):
+    """
+    Merges 'system_message', 'background_text', and conversation 'history'
+    into a single text prompt, then calls client.text_generation(...)
+    for a response.
+    """
     if history is None:
         history = []
+    # Combine system instructions + background + prior conversation + new user message
+    prompt = f"{system_message}\n\n### Background Information ###\n{background_text}\n\n"
     for interaction in history:
         if "user" in interaction:
+            prompt += f"User: {interaction['user']}\n"
         if "assistant" in interaction:
+            prompt += f"Assistant: {interaction['assistant']}\n"
+    # Add the latest user query
+    prompt += f"User: {message}\nAssistant:"  # We'll generate the Assistant's text after this
+    # Generate response using text_generation in streaming mode
     response = ""
+    # The text returned will include the entire prompt + new text,
+    # so we’ll need to subtract out the prompt length to isolate the new portion.
+    prompt_length = len(prompt)
+    for chunk in client.text_generation(
+        prompt=prompt,
+        max_new_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
+        stream=True,  # streaming each chunk
     ):
+        # Each chunk is a dict like {"generated_text": "full text so far..."}
+        full_text = chunk["generated_text"]
+        # The newly generated portion is what's after the original prompt
+        new_text = full_text[prompt_length:]
+        response += new_text
+        prompt_length = len(full_text)  # update for next chunk
         yield response
+    # For debugging: show what we actually sent
+    print("----- FULL PROMPT -----")
+    print(prompt)
+    print("----- END PROMPT -----")
 # Step 3: Build a Gradio Blocks interface with two Tabs
 with gr.Blocks() as demo:
+    with gr.Tab("Gemma Chat Agent"):
         gr.Markdown("## Welcome to Varun's GPT Agent")
         gr.Markdown("Feel free to ask questions about Varun’s journey, skills, and more!")
         chat = gr.ChatInterface(
                 gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
                 gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
             ],
+            type="messages",  # Gradio will keep track of (user, assistant) messages in history
         )
+    # Optional: If you want a separate tab to display background_text
     # with gr.Tab("Varun's Background"):
     #     gr.Markdown("# About Varun")
     #     gr.Markdown(background_text)
 # Step 4: Launch
 if __name__ == "__main__":
     demo.launch()