Spaces:

m1n9k7
/

gemini-multi-modal

Sleeping

App Files Files Community

TheM1N9 commited on Sep 13, 2024

Commit

de15782

1 Parent(s): 775a8e1

added type annotions and doc strings

Browse files

Files changed (2) hide show

.gitignore +56 -0
app.py +81 -26

.gitignore ADDED Viewed

	@@ -0,0 +1,56 @@

+.env
+uploads
+chroma
+instance
+.venv
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+venv/
+.python-version
+*.log

app.py CHANGED Viewed

@@ -1,41 +1,68 @@
 import google.generativeai as genai
 from dotenv import load_dotenv
 import os
 import gradio as gr
 from PIL import Image
 import numpy as np
 load_dotenv()
-GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 genai.configure(api_key=GOOGLE_API_KEY)
-def save_image(file_input, image_name):
     # Convert the input to a PIL image
-    image_pil = Image.fromarray(np.uint8(file_input))
     # Define the directory where the image will be saved
     save_directory = "images"
     # Check if the directory exists, create it if not
     if not os.path.exists(save_directory):
         os.makedirs(save_directory, exist_ok=True)
     # Define the full path to save the image
-    image_path = os.path.join(save_directory, image_name)
     # Save the image
     image_pil.save(image_path)
     return image_path
-def generate_response(text_input, file_inputs=None, chat_history=None):
     # Upload the files (images) and print a confirmation.
-    image_paths = []
     if file_inputs is not None:
         for idx, file_input in enumerate(file_inputs):
-            image_name = f"image_{idx + 1}.jpg"
-            image_path = save_image(file_input, image_name)
             image_paths.append(image_path)
     # Choose a Gemini API model.
@@ -49,30 +76,37 @@ def generate_response(text_input, file_inputs=None, chat_history=None):
     chat_history_content = []
     for user_message, bot_response in chat_history:
         chat_history_content.append({"role": "user", "parts": [{"text": user_message}]})
-        chat_history_content.append({"role": "model", "parts": [{"text": bot_response}]})
-    chat = model.start_chat(history=chat_history_content)
     # Open images and pass them with text_input if available
-    images = [Image.open(image_path) for image_path in image_paths] if image_paths else None
     # Prompt the model with text and the uploaded images if available
     if images:
-        response = chat.send_message([*images, text_input])
     else:
-        response = chat.send_message(text_input)
     # Append the new message to chat history in Gradio format (user, bot)
     chat_history.append((text_input, response.text))
-    return "", chat_history
 # Create a Gradio interface with Blocks
 with gr.Blocks(title="Gemini vision") as demo:
     gr.Markdown("# Chat Bot M1N9")
     # Define the Chatbot component
-    chatbot = gr.Chatbot([], elem_id="chatbot", height=700, show_share_button=True, show_copy_button=True)
     # Define the Textbox and Image components
     msg = gr.Textbox(show_copy_button=True, placeholder="Type your message here...")
@@ -83,28 +117,49 @@ with gr.Blocks(title="Gemini vision") as demo:
         img2 = gr.Image()
         img3 = gr.Image()
         img4 = gr.Image()
     btn = gr.Button("Submit")
     # Define the ClearButton component
     clear = gr.ClearButton([msg, img1, img2, img3, img4, chatbot])
     # Set the submit function for the Textbox and Image
-    def submit_message(msg, img1, img2, img3, img4, chat_history):
         # Collect all images into a list
         image_list = [img1, img2, img3, img4]
         # Filter out None values in case fewer than 4 images are uploaded
         image_list = [img for img in image_list if img is not None]
         # Call the generate_response with the list of images
         response, chat_history = generate_response(msg, image_list, chat_history)
         # Return the updated chat history and clear input fields
-        return "", None, None, None, None, chat_history
     # Bind the submit function to both the submit action of Textbox and the button click
-    msg.submit(submit_message, [msg, img1, img2, img3, img4, chatbot], [msg, img1, img2, img3, img4, chatbot])
-    btn.click(submit_message, [msg, img1, img2, img3, img4, chatbot], [msg, img1, img2, img3, img4, chatbot])
 # Launch the Gradio interface
 demo.launch(debug=True, share=True)

+from typing import Any, List, Optional, Tuple, Literal
 import google.generativeai as genai
 from dotenv import load_dotenv
 import os
+from google.generativeai.types.generation_types import GenerateContentResponse
 import gradio as gr
 from PIL import Image
 import numpy as np
 load_dotenv()
+GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "Enter correct API key")
 genai.configure(api_key=GOOGLE_API_KEY)
+def save_image(file_input: str, image_name: str) -> str:
+    """Saves images into the memory.
+    Args:
+        file_input (str): file input from Gradio
+        image_name (str): file name to be saved
+    Returns:
+        str: path of the saved image
+    """
     # Convert the input to a PIL image
+    image_pil: Image.Image = Image.fromarray(np.uint8(file_input))
     # Define the directory where the image will be saved
     save_directory = "images"
     # Check if the directory exists, create it if not
     if not os.path.exists(save_directory):
         os.makedirs(save_directory, exist_ok=True)
     # Define the full path to save the image
+    image_path: str = os.path.join(save_directory, image_name)
     # Save the image
     image_pil.save(image_path)
     return image_path
+def generate_response(
+    text_input: str,
+    file_inputs: Optional[List[str]] = None,
+    chat_history: Optional[List[Tuple[str, str]]] = None,
+) -> Tuple[str, Any | List[Any]]:
+    """Generates response using gemini-1.5-flash model.
+    Args:
+        text_input (str): user input
+        file_inputs (List[str], optional): file paths of the uploaded images. Defaults to None.
+        chat_history (List[Tuple[str, str]], optional): chat history of the user. Defaults to None.
+    Returns:
+        Tuple[str, Any | List[Any]]: returns response and chat history
+    """
     # Upload the files (images) and print a confirmation.
+    image_paths: List[str] = []
     if file_inputs is not None:
         for idx, file_input in enumerate(file_inputs):
+            image_name: str = f"image_{idx + 1}.jpg"
+            image_path: str = save_image(file_input, image_name)
             image_paths.append(image_path)
     # Choose a Gemini API model.
     chat_history_content = []
     for user_message, bot_response in chat_history:
         chat_history_content.append({"role": "user", "parts": [{"text": user_message}]})
+        chat_history_content.append(
+            {"role": "model", "parts": [{"text": bot_response}]}
+        )
+    chat: genai.ChatSession = model.start_chat(history=chat_history_content)
     # Open images and pass them with text_input if available
+    images = (
+        [Image.open(image_path) for image_path in image_paths] if image_paths else None
+    )
     # Prompt the model with text and the uploaded images if available
     if images:
+        response: GenerateContentResponse = chat.send_message([*images, text_input])
     else:
+        response: GenerateContentResponse = chat.send_message(text_input)
     # Append the new message to chat history in Gradio format (user, bot)
     chat_history.append((text_input, response.text))
+    return response.text, chat_history
 # Create a Gradio interface with Blocks
 with gr.Blocks(title="Gemini vision") as demo:
     gr.Markdown("# Chat Bot M1N9")
     # Define the Chatbot component
+    chatbot = gr.Chatbot(
+        [], elem_id="chatbot", height=700, show_share_button=True, show_copy_button=True
+    )
     # Define the Textbox and Image components
     msg = gr.Textbox(show_copy_button=True, placeholder="Type your message here...")
         img2 = gr.Image()
         img3 = gr.Image()
         img4 = gr.Image()
     btn = gr.Button("Submit")
     # Define the ClearButton component
     clear = gr.ClearButton([msg, img1, img2, img3, img4, chatbot])
     # Set the submit function for the Textbox and Image
+    def submit_message(msg: str, img1, img2, img3, img4, chat_history):
+        """Takes response from the generated response and displays it in the chatbot.
+        Args:
+            msg (str): user input
+            img1 (_type_): image input
+            img2 (_type_): image input
+            img3 (_type_): image input
+            img4 (_type_): image input
+            chat_history (_type_): chat history of the user
+        Returns:
+            _type_: _description_
+        """
         # Collect all images into a list
         image_list = [img1, img2, img3, img4]
         # Filter out None values in case fewer than 4 images are uploaded
         image_list = [img for img in image_list if img is not None]
         # Call the generate_response with the list of images
         response, chat_history = generate_response(msg, image_list, chat_history)
         # Return the updated chat history and clear input fields
+        return "", img1, img2, img3, img4, chat_history
     # Bind the submit function to both the submit action of Textbox and the button click
+    msg.submit(
+        submit_message,
+        [msg, img1, img2, img3, img4, chatbot],
+        [msg, img1, img2, img3, img4, chatbot],
+    )
+    btn.click(
+        submit_message,
+        [msg, img1, img2, img3, img4, chatbot],
+        [msg, img1, img2, img3, img4, chatbot],
+    )
 # Launch the Gradio interface
 demo.launch(debug=True, share=True)