Spaces:

jonaschua
/

gemma

Runtime error

App Files Files Community

jonaschua commited on Mar 13

Commit

11c1c09

verified ·

1 Parent(s): 4962fc9

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -20

app.py CHANGED Viewed

@@ -1,21 +1,45 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
@@ -27,24 +51,23 @@ def respond(
     response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),

 import gradio as gr
 from huggingface_hub import InferenceClient
+import spaces
+import torch
+import os
+from huggingface_hub import login
+from PIL import Image
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
+# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+duration=None
+login(token = os.getenv('deepseekv2'))
+ckpt = "google/gemma-3-4b-it"
+model = Gemma3ForConditionalGeneration.from_pretrained(
+    ckpt, device_map="auto", torch_dtype=torch.bfloat16,
+)
+processor = AutoProcessor.from_pretrained(ckpt)
+# image = Image.open(requests.get(url, stream=True).raw)
+# prompt = "<start_of_image> in this image, there is"
+# model_inputs = processor(text=prompt, images=image, return_tensors="pt")
+# input_len = model_inputs["input_ids"].shape[-1]
+# with torch.inference_mode():
+#     generation = model.generate(**model_inputs, max_new_tokens=100, do_sample=False)
+#     generation = generation[0][input_len:]
+@spaces.GPU(duration=duration)
+def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p,):
+    # messages = [{"role": "system", "content": system_message}]
+    messages = [{
+        "role": "user",
+        "content": [
+            {"type": "image", "url": "https://huggingface.co/spaces/big-vision/paligemma-hf/resolve/main/examples/password.jpg"},
+            {"type": "text", "text": "What is the password?"}
+        ]}]
     for val in history:
         if val[0]:
     response = ""
+    # for message in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p,):
+    #     token = message.choices[0].delta.content
+    #     response += token
+    #     yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
 demo = gr.ChatInterface(
     respond,
+    textbox=gr.MultimodalTextbox()
+    multimodal=True,
+    stop_btn="Stop generation",
     additional_inputs=[
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),