zjrwtxtechstudio commited on
Commit
b1a2f92
·
1 Parent(s): 6002f2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -215
app.py CHANGED
@@ -1,223 +1,105 @@
1
- import os
2
- import time
3
- from typing import List, Tuple, Optional
4
-
5
  import google.generativeai as genai
 
 
6
  import gradio as gr
7
- from PIL import Image
8
-
9
- print("google-generativeai:", genai.__version__)
10
-
11
- GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
12
-
13
- TITLE = """<h1 align="center">免费用Gemini语言+图片多模态对话💬</h1>"""
14
- SUBTITLE = """<h2 align="center">公众号:正经人王同学</h2>"""
15
-
16
-
17
- AVATAR_IMAGES = (
18
- None,
19
- "https://media.roboflow.com/spaces/gemini-icon.png"
20
- )
21
-
22
- IMAGE_WIDTH = 512
23
-
24
-
25
- def preprocess_stop_sequences(stop_sequences: str) -> Optional[List[str]]:
26
- if not stop_sequences:
27
- return None
28
- return [sequence.strip() for sequence in stop_sequences.split(",")]
29
-
30
-
31
- def preprocess_image(image: Image.Image) -> Optional[Image.Image]:
32
- image_height = int(image.height * IMAGE_WIDTH / image.width)
33
- return image.resize((IMAGE_WIDTH, image_height))
34
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- def user(text_prompt: str, chatbot: List[Tuple[str, str]]):
37
- return "", chatbot + [[text_prompt, None]]
38
 
 
 
 
39
 
40
- def bot(
41
- google_key: str,
42
- image_prompt: Optional[Image.Image],
43
- temperature: float,
44
- max_output_tokens: int,
45
- stop_sequences: str,
46
- top_k: int,
47
- top_p: float,
48
- chatbot: List[Tuple[str, str]]
49
- ):
50
- google_key = google_key if google_key else GOOGLE_API_KEY
51
- if not google_key:
52
- raise ValueError(
53
- "GOOGLE_API_KEY is not set. "
54
- "Please follow the instructions in the README to set it up.")
55
 
56
- text_prompt = chatbot[-1][0]
57
- genai.configure(api_key=google_key)
58
- generation_config = genai.types.GenerationConfig(
59
- temperature=temperature,
60
- max_output_tokens=max_output_tokens,
61
- stop_sequences=preprocess_stop_sequences(stop_sequences=stop_sequences),
62
- top_k=top_k,
63
- top_p=top_p)
64
 
65
- if image_prompt is None:
66
- model = genai.GenerativeModel('gemini-pro')
67
- response = model.generate_content(
68
- text_prompt,
69
- stream=True,
70
- generation_config=generation_config)
71
- response.resolve()
72
- else:
73
- image_prompt = preprocess_image(image_prompt)
74
- model = genai.GenerativeModel('gemini-pro-vision')
75
- response = model.generate_content(
76
- contents=[text_prompt, image_prompt],
77
- stream=True,
78
- generation_config=generation_config)
79
- response.resolve()
80
-
81
- # streaming effect
82
- chatbot[-1][1] = ""
83
- for chunk in response:
84
- for i in range(0, len(chunk.text), 10):
85
- section = chunk.text[i:i + 10]
86
- chatbot[-1][1] += section
87
- time.sleep(0.01)
88
- yield chatbot
89
-
90
-
91
- google_key_component = gr.Textbox(
92
- label="GOOGLE API KEY",
93
- value="",
94
- type="password",
95
- placeholder="...",
96
- info="You have to provide your own GOOGLE_API_KEY for this app to function properly",
97
- visible=GOOGLE_API_KEY is None
98
- )
99
-
100
- image_prompt_component = gr.Image(type="pil", label="Image", scale=1)
101
- chatbot_component = gr.Chatbot(
102
- label='Gemini',
103
- bubble_full_width=False,
104
- avatar_images=AVATAR_IMAGES,
105
- scale=2
106
- )
107
- text_prompt_component = gr.Textbox(
108
- placeholder="想说些什么呢?",
109
- label="公众号:正经人王同学"
110
- )
111
- run_button_component = gr.Button()
112
- temperature_component = gr.Slider(
113
- minimum=0,
114
- maximum=1.0,
115
- value=0.4,
116
- step=0.05,
117
- label="Temperature",
118
- info=(
119
- "Temperature controls the degree of randomness in token selection. Lower "
120
- "temperatures are good for prompts that expect a true or correct response, "
121
- "while higher temperatures can lead to more diverse or unexpected results. "
122
- ))
123
- max_output_tokens_component = gr.Slider(
124
- minimum=1,
125
- maximum=2048,
126
- value=1024,
127
- step=1,
128
- label="Token limit",
129
- info=(
130
- "Token limit determines the maximum amount of text output from one prompt. A "
131
- "token is approximately four characters. The default value is 2048."
132
- ))
133
- stop_sequences_component = gr.Textbox(
134
- label="Add stop sequence",
135
- value="",
136
- type="text",
137
- placeholder="STOP, END",
138
- info=(
139
- "A stop sequence is a series of characters (including spaces) that stops "
140
- "response generation if the model encounters it. The sequence is not included "
141
- "as part of the response. You can add up to five stop sequences."
142
- ))
143
- top_k_component = gr.Slider(
144
- minimum=1,
145
- maximum=40,
146
- value=32,
147
- step=1,
148
- label="Top-K",
149
- info=(
150
- "Top-k changes how the model selects tokens for output. A top-k of 1 means the "
151
- "selected token is the most probable among all tokens in the model’s "
152
- "vocabulary (also called greedy decoding), while a top-k of 3 means that the "
153
- "next token is selected from among the 3 most probable tokens (using "
154
- "temperature)."
155
- ))
156
- top_p_component = gr.Slider(
157
- minimum=0,
158
- maximum=1,
159
- value=1,
160
- step=0.01,
161
- label="Top-P",
162
- info=(
163
- "Top-p changes how the model selects tokens for output. Tokens are selected "
164
- "from most probable to least until the sum of their probabilities equals the "
165
- "top-p value. For example, if tokens A, B, and C have a probability of .3, .2, "
166
- "and .1 and the top-p value is .5, then the model will select either A or B as "
167
- "the next token (using temperature). "
168
- ))
169
-
170
- user_inputs = [
171
- text_prompt_component,
172
- chatbot_component
173
- ]
174
-
175
- bot_inputs = [
176
- google_key_component,
177
- image_prompt_component,
178
- temperature_component,
179
- max_output_tokens_component,
180
- stop_sequences_component,
181
- top_k_component,
182
- top_p_component,
183
- chatbot_component
184
- ]
185
 
186
- with gr.Blocks() as demo:
187
- gr.HTML(TITLE)
188
- gr.HTML(SUBTITLE)
189
-
190
- with gr.Column():
191
- google_key_component.render()
192
- with gr.Row():
193
- image_prompt_component.render()
194
- chatbot_component.render()
195
- text_prompt_component.render()
196
- run_button_component.render()
197
- with gr.Accordion("Parameters", open=False):
198
- temperature_component.render()
199
- max_output_tokens_component.render()
200
- stop_sequences_component.render()
201
- with gr.Accordion("Advanced", open=False):
202
- top_k_component.render()
203
- top_p_component.render()
204
-
205
- run_button_component.click(
206
- fn=user,
207
- inputs=user_inputs,
208
- outputs=[text_prompt_component, chatbot_component],
209
- queue=False
210
- ).then(
211
- fn=bot, inputs=bot_inputs, outputs=[chatbot_component],
212
- )
213
-
214
- text_prompt_component.submit(
215
- fn=user,
216
- inputs=user_inputs,
217
- outputs=[text_prompt_component, chatbot_component],
218
- queue=False
219
- ).then(
220
- fn=bot, inputs=bot_inputs, outputs=[chatbot_component],
221
- )
222
-
223
- demo.queue(max_size=99).launch(debug=False, show_error=True)
 
1
+ # import required packages
 
 
 
2
  import google.generativeai as genai
3
+ import os
4
+ import PIL.Image
5
  import gradio as gr
6
+ from gradio_multimodalchatbot import MultimodalChatbot
7
+ from gradio.data_classes import FileData
8
+
9
+ # For better security practices, retrieve sensitive information like API keys from environment variables.
10
+
11
+ # Fetch an environment variable.
12
+ GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
13
+ genai.configure(api_key=GOOGLE_API_KEY)
14
+
15
+ # These codelines are just to verify if your api key is correct or not
16
+ # Use them when you clone the repo and build locally
17
+ #!curl \
18
+ #-H 'Content-Type: application/json' \
19
+ #-d '{ "prompt": { "text": "Write a very short story about a magic backpack"} }' \
20
+ #"https://generativelanguage.googleapis.com/v1beta3/models/text-bison-001:generateText?key=<enter-your-key-here>"
21
+
22
+ # Initialize genai models
23
+ model = genai.GenerativeModel('gemini-pro')
24
+ modelvis = genai.GenerativeModel('gemini-pro-vision')
25
+
26
+ def gemini(input, file, chatbot=[]):
27
+ """
28
+ Function to handle gemini model and gemini vision model interactions.
29
+ Parameters:
30
+ input (str): The input text.
31
+ file (File): An optional file object for image processing.
32
+ chatbot (list): A list to keep track of chatbot interactions.
33
+ Returns:
34
+ tuple: Updated chatbot interaction list, an empty string, and None.
35
+ """
36
+
37
+ messages = []
38
+ print(chatbot)
39
+
40
+ # Process previous chatbot messages if present
41
+ if len(chatbot) != 0:
42
+ for user, bot in chatbot:
43
+ user, bot = user.text, bot.text
44
+ messages.extend([
45
+ {'role': 'user', 'parts': [user]},
46
+ {'role': 'model', 'parts': [bot]}
47
+ ])
48
+ messages.append({'role': 'user', 'parts': [input]})
49
+ else:
50
+ messages.append({'role': 'user', 'parts': [input]})
51
+
52
+ try:
53
+ # Process image if file is provided
54
+ if file is not None:
55
+ with PIL.Image.open(file.name) as img:
56
+ message = [{'role': 'user', 'parts': [input, img]}]
57
+ response = modelvis.generate_content(message)
58
+ gemini_video_resp = response.text
59
+ messages.append({'role': 'model', 'parts': [gemini_video_resp]})
60
+
61
+ # Construct list of messages in the required format
62
+ user_msg = {"text": input, "files": [{"file": FileData(path=file.name)}]}
63
+ bot_msg = {"text": gemini_video_resp, "files": []}
64
+ chatbot.append([user_msg, bot_msg])
65
+ else:
66
+ response = model.generate_content(messages)
67
+ gemini_resp = response.text
68
+
69
+ # Construct list of messages in the required format
70
+ user_msg = {"text": input, "files": []}
71
+ bot_msg = {"text": gemini_resp, "files": []}
72
+ chatbot.append([user_msg, bot_msg])
73
+ except Exception as e:
74
+ # Handling exceptions and raising error to the modal
75
+ print(f"An error occurred: {e}")
76
+ raise gr.Error(e)
77
+
78
+ return chatbot, "", None
79
+
80
+ # Define the Gradio Blocks interface
81
+ with gr.Blocks() as demo:
82
+ # Add a centered header using HTML
83
+ gr.HTML("<center><h1>免费用Gemini语言+图片多模态对话💬公众号:正经人王同学</h1></center>")
84
 
85
+ # Initialize the MultimodalChatbot component
86
+ multi = MultimodalChatbot(value=[], height=800)
87
 
88
+ with gr.Row():
89
+ # Textbox for user input with increased scale for better visibility
90
+ tb = gr.Textbox(scale=4, placeholder='请上传图片或直接开始对话吧||公众号:正经人王同学')
91
 
92
+ # Upload button for image files
93
+ up = gr.UploadButton("上传图片", file_types=["image"], scale=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ # Define the behavior on text submission
96
+ tb.submit(gemini, [tb, up, multi], [multi, tb, up])
 
 
 
 
 
 
97
 
98
+ # Define the behavior on image upload
99
+ # Using chained then() calls to update the upload button's state
100
+ up.upload(lambda: gr.UploadButton("上传图片中..."), [], up) \
101
+ .then(lambda: gr.UploadButton("图片已上传"), [], up) \
102
+ .then(lambda: gr.UploadButton("上传图片"), [], up)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ # Launch the demo with a queue to handle multiple users
105
+ demo.queue().launch()