test1

Sleeping

App Files Files Community

zjrwtxtechstudio commited on Dec 16, 2023

Commit

b1a2f92

1 Parent(s): 6002f2c

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -215

app.py CHANGED Viewed

@@ -1,223 +1,105 @@
-import os
-import time
-from typing import List, Tuple, Optional
 import google.generativeai as genai
 import gradio as gr
-from PIL import Image
-print("google-generativeai:", genai.__version__)
-GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
-TITLE = """<h1 align="center">免费用Gemini语言+图片多模态对话💬</h1>"""
-SUBTITLE = """<h2 align="center">公众号：正经人王同学</h2>"""
-AVATAR_IMAGES = (
-    None,
-    "https://media.roboflow.com/spaces/gemini-icon.png"
-)
-IMAGE_WIDTH = 512
-def preprocess_stop_sequences(stop_sequences: str) -> Optional[List[str]]:
-    if not stop_sequences:
-        return None
-    return [sequence.strip() for sequence in stop_sequences.split(",")]
-def preprocess_image(image: Image.Image) -> Optional[Image.Image]:
-    image_height = int(image.height * IMAGE_WIDTH / image.width)
-    return image.resize((IMAGE_WIDTH, image_height))
-def user(text_prompt: str, chatbot: List[Tuple[str, str]]):
-    return "", chatbot + [[text_prompt, None]]
-def bot(
-    google_key: str,
-    image_prompt: Optional[Image.Image],
-    temperature: float,
-    max_output_tokens: int,
-    stop_sequences: str,
-    top_k: int,
-    top_p: float,
-    chatbot: List[Tuple[str, str]]
-):
-    google_key = google_key if google_key else GOOGLE_API_KEY
-    if not google_key:
-        raise ValueError(
-            "GOOGLE_API_KEY is not set. "
-            "Please follow the instructions in the README to set it up.")
-    text_prompt = chatbot[-1][0]
-    genai.configure(api_key=google_key)
-    generation_config = genai.types.GenerationConfig(
-        temperature=temperature,
-        max_output_tokens=max_output_tokens,
-        stop_sequences=preprocess_stop_sequences(stop_sequences=stop_sequences),
-        top_k=top_k,
-        top_p=top_p)
-    if image_prompt is None:
-        model = genai.GenerativeModel('gemini-pro')
-        response = model.generate_content(
-            text_prompt,
-            stream=True,
-            generation_config=generation_config)
-        response.resolve()
-    else:
-        image_prompt = preprocess_image(image_prompt)
-        model = genai.GenerativeModel('gemini-pro-vision')
-        response = model.generate_content(
-            contents=[text_prompt, image_prompt],
-            stream=True,
-            generation_config=generation_config)
-        response.resolve()
-    # streaming effect
-    chatbot[-1][1] = ""
-    for chunk in response:
-        for i in range(0, len(chunk.text), 10):
-            section = chunk.text[i:i + 10]
-            chatbot[-1][1] += section
-            time.sleep(0.01)
-            yield chatbot
-google_key_component = gr.Textbox(
-    label="GOOGLE API KEY",
-    value="",
-    type="password",
-    placeholder="...",
-    info="You have to provide your own GOOGLE_API_KEY for this app to function properly",
-    visible=GOOGLE_API_KEY is None
-)
-image_prompt_component = gr.Image(type="pil", label="Image", scale=1)
-chatbot_component = gr.Chatbot(
-    label='Gemini',
-    bubble_full_width=False,
-    avatar_images=AVATAR_IMAGES,
-    scale=2
-)
-text_prompt_component = gr.Textbox(
-    placeholder="想说些什么呢？",
-    label="公众号：正经人王同学"
-)
-run_button_component = gr.Button()
-temperature_component = gr.Slider(
-    minimum=0,
-    maximum=1.0,
-    value=0.4,
-    step=0.05,
-    label="Temperature",
-    info=(
-        "Temperature controls the degree of randomness in token selection. Lower "
-        "temperatures are good for prompts that expect a true or correct response, "
-        "while higher temperatures can lead to more diverse or unexpected results. "
-    ))
-max_output_tokens_component = gr.Slider(
-    minimum=1,
-    maximum=2048,
-    value=1024,
-    step=1,
-    label="Token limit",
-    info=(
-        "Token limit determines the maximum amount of text output from one prompt. A "
-        "token is approximately four characters. The default value is 2048."
-    ))
-stop_sequences_component = gr.Textbox(
-    label="Add stop sequence",
-    value="",
-    type="text",
-    placeholder="STOP, END",
-    info=(
-        "A stop sequence is a series of characters (including spaces) that stops "
-        "response generation if the model encounters it. The sequence is not included "
-        "as part of the response. You can add up to five stop sequences."
-    ))
-top_k_component = gr.Slider(
-    minimum=1,
-    maximum=40,
-    value=32,
-    step=1,
-    label="Top-K",
-    info=(
-        "Top-k changes how the model selects tokens for output. A top-k of 1 means the "
-        "selected token is the most probable among all tokens in the model’s "
-        "vocabulary (also called greedy decoding), while a top-k of 3 means that the "
-        "next token is selected from among the 3 most probable tokens (using "
-        "temperature)."
-    ))
-top_p_component = gr.Slider(
-    minimum=0,
-    maximum=1,
-    value=1,
-    step=0.01,
-    label="Top-P",
-    info=(
-        "Top-p changes how the model selects tokens for output. Tokens are selected "
-        "from most probable to least until the sum of their probabilities equals the "
-        "top-p value. For example, if tokens A, B, and C have a probability of .3, .2, "
-        "and .1 and the top-p value is .5, then the model will select either A or B as "
-        "the next token (using temperature). "
-    ))
-user_inputs = [
-    text_prompt_component,
-    chatbot_component
-]
-bot_inputs = [
-    google_key_component,
-    image_prompt_component,
-    temperature_component,
-    max_output_tokens_component,
-    stop_sequences_component,
-    top_k_component,
-    top_p_component,
-    chatbot_component
-]
-with gr.Blocks() as demo:
-    gr.HTML(TITLE)
-    gr.HTML(SUBTITLE)
-    with gr.Column():
-        google_key_component.render()
-        with gr.Row():
-            image_prompt_component.render()
-            chatbot_component.render()
-        text_prompt_component.render()
-        run_button_component.render()
-        with gr.Accordion("Parameters", open=False):
-            temperature_component.render()
-            max_output_tokens_component.render()
-            stop_sequences_component.render()
-            with gr.Accordion("Advanced", open=False):
-                top_k_component.render()
-                top_p_component.render()
-    run_button_component.click(
-        fn=user,
-        inputs=user_inputs,
-        outputs=[text_prompt_component, chatbot_component],
-        queue=False
-    ).then(
-        fn=bot, inputs=bot_inputs, outputs=[chatbot_component],
-    )
-    text_prompt_component.submit(
-        fn=user,
-        inputs=user_inputs,
-        outputs=[text_prompt_component, chatbot_component],
-        queue=False
-    ).then(
-        fn=bot, inputs=bot_inputs, outputs=[chatbot_component],
-    )
-demo.queue(max_size=99).launch(debug=False, show_error=True)

+# import required packages
 import google.generativeai as genai
+import os
+import PIL.Image
 import gradio as gr
+from gradio_multimodalchatbot import MultimodalChatbot
+from gradio.data_classes import FileData
+# For better security practices, retrieve sensitive information like API keys from environment variables.
+# Fetch an environment variable.
+GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
+genai.configure(api_key=GOOGLE_API_KEY)
+# These codelines are just to verify if your api key is correct or not
+# Use them when you clone the repo and build locally
+#!curl \
+#-H 'Content-Type: application/json' \
+#-d '{ "prompt": { "text": "Write a very short story about a magic backpack"} }' \
+#"https://generativelanguage.googleapis.com/v1beta3/models/text-bison-001:generateText?key=<enter-your-key-here>"
+# Initialize genai models
+model = genai.GenerativeModel('gemini-pro')
+modelvis = genai.GenerativeModel('gemini-pro-vision')
+def gemini(input, file, chatbot=[]):
+    """
+    Function to handle gemini model and gemini vision model interactions.
+    Parameters:
+    input (str): The input text.
+    file (File): An optional file object for image processing.
+    chatbot (list): A list to keep track of chatbot interactions.
+    Returns:
+    tuple: Updated chatbot interaction list, an empty string, and None.
+    """
+    messages = []
+    print(chatbot)
+    # Process previous chatbot messages if present
+    if len(chatbot) != 0:
+        for user, bot in chatbot:
+            user, bot = user.text, bot.text
+            messages.extend([
+                {'role': 'user', 'parts': [user]},
+                {'role': 'model', 'parts': [bot]}
+            ])
+        messages.append({'role': 'user', 'parts': [input]})
+    else:
+        messages.append({'role': 'user', 'parts': [input]})
+    try:
+        # Process image if file is provided
+        if file is not None:
+            with PIL.Image.open(file.name) as img:
+                message = [{'role': 'user', 'parts': [input, img]}]
+                response = modelvis.generate_content(message)
+                gemini_video_resp = response.text
+                messages.append({'role': 'model', 'parts': [gemini_video_resp]})
+                # Construct list of messages in the required format
+                user_msg = {"text": input, "files": [{"file": FileData(path=file.name)}]}
+                bot_msg = {"text": gemini_video_resp, "files": []}
+                chatbot.append([user_msg, bot_msg])
+        else:
+            response = model.generate_content(messages)
+            gemini_resp = response.text
+            # Construct list of messages in the required format
+            user_msg = {"text": input, "files": []}
+            bot_msg = {"text": gemini_resp, "files": []}
+            chatbot.append([user_msg, bot_msg])
+    except Exception as e:
+        # Handling exceptions and raising error to the modal
+        print(f"An error occurred: {e}")
+        raise gr.Error(e)
+    return chatbot, "", None
+# Define the Gradio Blocks interface
+with gr.Blocks() as demo:
+    # Add a centered header using HTML
+    gr.HTML("<center><h1>免费用Gemini语言+图片多模态对话💬公众号：正经人王同学</h1></center>")
+    # Initialize the MultimodalChatbot component
+    multi = MultimodalChatbot(value=[], height=800)
+    with gr.Row():
+        # Textbox for user input with increased scale for better visibility
+        tb = gr.Textbox(scale=4, placeholder='请上传图片或直接开始对话吧||公众号：正经人王同学')
+        # Upload button for image files
+        up = gr.UploadButton("上传图片", file_types=["image"], scale=1)
+    # Define the behavior on text submission
+    tb.submit(gemini, [tb, up, multi], [multi, tb, up])
+    # Define the behavior on image upload
+    # Using chained then() calls to update the upload button's state
+    up.upload(lambda: gr.UploadButton("上传图片中..."), [], up) \
+       .then(lambda: gr.UploadButton("图片已上传"), [], up) \
+       .then(lambda: gr.UploadButton("上传图片"), [], up)
+# Launch the demo with a queue to handle multiple users
+demo.queue().launch()