sanbo commited on
Commit
c6414ce
·
1 Parent(s): f38f24d

update sth. at 2024-11-15 17:08:48

Browse files
Files changed (1) hide show
  1. app.py +126 -198
app.py CHANGED
@@ -1,220 +1,148 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import json
4
- import uuid
5
  from PIL import Image
6
- from bs4 import BeautifulSoup
7
  import requests
8
- import random
9
- from transformers import LlavaProcessor, LlavaForConditionalGeneration, TextIteratorStreamer
10
- from threading import Thread
11
- import re
12
- import time
13
- import torch
14
- import cv2
15
- from gradio_client import Client, file
 
 
16
 
17
- def image_gen(prompt):
18
- client = Client("KingNish/Image-Gen-Pro")
19
- return client.predict("Image Generation",None, prompt, api_name="/image_gen_pro")
20
 
21
- model_id = "llava-hf/llava-interleave-qwen-0.5b-hf"
22
 
23
- processor = LlavaProcessor.from_pretrained(model_id)
 
 
 
 
 
 
 
 
 
 
24
 
25
- model = LlavaForConditionalGeneration.from_pretrained(model_id)
26
- model.to("cpu")
27
 
 
 
 
 
 
 
 
28
 
29
- def llava(message, history):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  if message["files"]:
31
- image = message["files"][0]
32
  else:
33
  for hist in history:
34
- if type(hist[0])==tuple:
35
- image = hist[0][0]
36
-
37
  txt = message["text"]
38
-
39
- gr.Info("Analyzing image")
40
  image = Image.open(image).convert("RGB")
41
  prompt = f"<|im_start|>user <image>\n{txt}<|im_end|><|im_start|>assistant"
42
-
43
  inputs = processor(prompt, image, return_tensors="pt")
44
  return inputs
45
 
46
- def extract_text_from_webpage(html_content):
47
- soup = BeautifulSoup(html_content, 'html.parser')
48
- for tag in soup(["script", "style", "header", "footer"]):
49
- tag.extract()
50
- return soup.get_text(strip=True)
51
 
52
- def search(query):
53
- term = query
54
- start = 0
55
- all_results = []
56
- max_chars_per_page = 8000
57
- with requests.Session() as session:
58
- resp = session.get(
59
- url="https://www.google.com/search",
60
- headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"},
61
- params={"q": term, "num": 3, "udm": 14},
62
- timeout=5,
63
- verify=None,
64
- )
65
- resp.raise_for_status()
66
- soup = BeautifulSoup(resp.text, "html.parser")
67
- result_block = soup.find_all("div", attrs={"class": "g"})
68
- for result in result_block:
69
- link = result.find("a", href=True)
70
- link = link["href"]
71
- try:
72
- webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"}, timeout=5, verify=False)
73
- webpage.raise_for_status()
74
- visible_text = extract_text_from_webpage(webpage.text)
75
- if len(visible_text) > max_chars_per_page:
76
- visible_text = visible_text[:max_chars_per_page]
77
- all_results.append({"link": link, "text": visible_text})
78
- except requests.exceptions.RequestException:
79
- all_results.append({"link": link, "text": None})
80
- return all_results
81
-
82
- # Initialize inference clients for different models
83
- client_gemma = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
84
- client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
85
- client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
86
- client_yi = InferenceClient("01-ai/Yi-1.5-34B-Chat")
87
 
88
- # Define the main chat function
89
- def respond(message, history):
90
- func_caller = []
91
 
92
- user_prompt = message
93
- # Handle image processing
94
- if message["files"]:
95
- inputs = llava(message, history)
96
- streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
97
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
98
-
99
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
100
- thread.start()
101
-
102
- buffer = ""
103
- for new_text in streamer:
104
- buffer += new_text
105
- yield buffer
106
- else:
107
- functions_metadata = [
108
- {"type": "function", "function": {"name": "web_search", "description": "Search query on google", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "web search query"}}, "required": ["query"]}}},
109
- {"type": "function", "function": {"name": "general_query", "description": "Reply general query of USER", "parameters": {"type": "object", "properties": {"prompt": {"type": "string", "description": "A detailed prompt"}}, "required": ["prompt"]}}},
110
- {"type": "function", "function": {"name": "image_generation", "description": "Generate image for user", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "image generation prompt"}}, "required": ["query"]}}},
111
- {"type": "function", "function": {"name": "image_qna", "description": "Answer question asked by user related to image", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Question by user"}}, "required": ["query"]}}},
112
- ]
113
-
114
- for msg in history:
115
- func_caller.append({"role": "user", "content": f"{str(msg[0])}"})
116
- func_caller.append({"role": "assistant", "content": f"{str(msg[1])}"})
117
-
118
- message_text = message["text"]
119
- func_caller.append({"role": "user", "content": f'[SYSTEM]You are a helpful assistant. You have access to the following functions: \n {str(functions_metadata)}\n\nTo use these functions respond with:\n<functioncall> {{ "name": "function_name", "arguments": {{ "arg_1": "value_1", "arg_1": "value_1", ... }} }} </functioncall> [USER] {message_text}'})
120
-
121
- response = client_gemma.chat_completion(func_caller, max_tokens=200)
122
- response = str(response)
123
- try:
124
- response = response[int(response.find("{")):int(response.rindex("</"))]
125
- except:
126
- response = response[int(response.find("{")):(int(response.rfind("}"))+1)]
127
- response = response.replace("\\n", "")
128
- response = response.replace("\\'", "'")
129
- response = response.replace('\\"', '"')
130
- response = response.replace('\\', '')
131
- print(f"\n{response}")
132
-
133
- try:
134
- json_data = json.loads(str(response))
135
- if json_data["name"] == "web_search":
136
- query = json_data["arguments"]["query"]
137
- gr.Info("Searching Web")
138
- web_results = search(query)
139
- gr.Info("Extracting relevant Info")
140
- web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
141
- messages = f"<|im_start|>system\nYou are OpenCHAT mini a helpful assistant made by KingNish. You are provided with WEB results from which you can find informations to answer users query in Structured and More better way. You do not say Unnecesarry things Only say thing which is important and relevant. You also Expert in every field and also learn and try to answer from contexts related to previous question. Try your best to give best response possible to user. You also try to show emotions using Emojis and reply like human, use short forms, friendly tone and emotions.<|im_end|>"
142
- for msg in history:
143
- messages += f"\n<|im_start|>user\n{str(msg[0])}<|im_end|>"
144
- messages += f"\n<|im_start|>assistant\n{str(msg[1])}<|im_end|>"
145
- messages+=f"\n<|im_start|>user\n{message_text}<|im_end|>\n<|im_start|>web_result\n{web2}<|im_end|>\n<|im_start|>assistant\n"
146
- stream = client_mixtral.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
147
- output = ""
148
- for response in stream:
149
- if not response.token.text == "<|im_end|>":
150
- output += response.token.text
151
- yield output
152
- elif json_data["name"] == "image_generation":
153
- query = json_data["arguments"]["query"]
154
- gr.Info("Generating Image, Please wait 10 sec...")
155
- yield "Generating Image, Please wait 10 sec..."
156
- try:
157
- image = image_gen(f"{str(query)}")
158
- yield gr.Image(image[1])
159
- except:
160
- client_sd3 = InferenceClient("stabilityai/stable-diffusion-3-medium-diffusers")
161
- seed = random.randint(0,999999)
162
- image = client_sd3.text_to_image(query, negative_prompt=f"{seed}")
163
- yield gr.Image(image)
164
- elif json_data["name"] == "image_qna":
165
- inputs = llava(message, history)
166
- streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
167
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
168
-
169
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
170
- thread.start()
171
-
172
- buffer = ""
173
- for new_text in streamer:
174
- buffer += new_text
175
- yield buffer
176
- else:
177
- messages = f"<|im_start|>system\nYou are OpenCHAT mini a helpful assistant made by KingNish. You answers users query like human friend. You are also Expert in every field and also learn and try to answer from contexts related to previous question. Try your best to give best response possible to user. You also try to show emotions using Emojis and reply like human, use short forms, friendly tone and emotions.<|im_end|>"
178
- for msg in history:
179
- messages += f"\n<|im_start|>user\n{str(msg[0])}<|im_end|>"
180
- messages += f"\n<|im_start|>assistant\n{str(msg[1])}<|im_end|>"
181
- messages+=f"\n<|im_start|>user\n{message_text}<|im_end|>\n<|im_start|>assistant\n"
182
- stream = client_yi.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
183
- output = ""
184
- for response in stream:
185
- if not response.token.text == "<|endoftext|>":
186
- output += response.token.text
187
- yield output
188
- except:
189
- messages = f"<|start_header_id|>system\nYou are OpenCHAT mini a helpful assistant made by KingNish. You answers users query like human friend. You are also Expert in every field and also learn and try to answer from contexts related to previous question. Try your best to give best response possible to user. You also try to show emotions using Emojis and reply like human, use short forms, friendly tone and emotions.<|end_header_id|>"
190
- for msg in history:
191
- messages += f"\n<|start_header_id|>user\n{str(msg[0])}<|end_header_id|>"
192
- messages += f"\n<|start_header_id|>assistant\n{str(msg[1])}<|end_header_id|>"
193
- messages+=f"\n<|start_header_id|>user\n{message_text}<|end_header_id|>\n<|start_header_id|>assistant\n"
194
- stream = client_llama.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
195
- output = ""
196
- for response in stream:
197
- if not response.token.text == "<|eot_id|>":
198
- output += response.token.text
199
- yield output
200
-
201
- # Create the Gradio interface
202
- demo = gr.ChatInterface(
203
- fn=respond,
204
- chatbot=gr.Chatbot(show_copy_button=True, likeable=True, layout="panel"),
205
- description ="# OpenGPT 4o mini\n ### You can engage in chat, generate images, perform web searches, and Q&A with images.",
206
- textbox=gr.MultimodalTextbox(),
207
- multimodal=True,
208
- concurrency_limit=200,
209
- examples=[
210
- {"text": "Hy, who are you?",},
211
- {"text": "What's the current price of Bitcoin",},
212
- {"text": "Search and Tell me what's trending on Youtube.",},
213
- {"text": "Create A Beautiful image of Effiel Tower at Night",},
214
- {"text": "Write me a Python function to calculate the first 10 digits of the fibonacci sequence.",},
215
- {"text": "What's the colour of car in given image", "files": ["./car1.png"]},
216
- {"text": "Read what's written on paper", "files": ["./paper_with_text.png"]},
217
- ],
218
- cache_examples=False,
219
- )
220
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
3
  from PIL import Image
 
4
  import requests
5
+ import json
6
+ import uuid
7
+
8
+ # ===================== 核心逻辑模块 =====================
9
+
10
+ # 初始化所需的模型客户端
11
+ client_gemma = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
12
+ client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
13
+ client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
14
+ client_yi = InferenceClient("01-ai/Yi-1.5-34B-Chat")
15
 
 
 
 
16
 
17
+ # ---------- 服务状态检查模块 ----------
18
 
19
+ def check_service_status():
20
+ """
21
+ 检查各个服务的可用状态,返回服务状态字典。
22
+ """
23
+ services = {
24
+ "Gemma": client_gemma.is_available(),
25
+ "Mixtral": client_mixtral.is_available(),
26
+ "Llama": client_llama.is_available(),
27
+ "Yi": client_yi.is_available(),
28
+ }
29
+ return services
30
 
 
 
31
 
32
+ def get_service_status_markdown():
33
+ """
34
+ 格式化服务状态为 Markdown 文本,用于界面展示。
35
+ """
36
+ statuses = check_service_status()
37
+ status_text = "\n".join([f"{service}: {'🟢 可用' if available else '🔴 不可用'}" for service, available in statuses.items()])
38
+ return gr.Markdown(status_text)
39
 
40
+
41
+ # ---------- 图像生成模块 ----------
42
+
43
+ def image_gen(prompt):
44
+ """
45
+ 调用图像生成模型生成图像,返回生成的图像路径。
46
+ """
47
+ client = Client("KingNish/Image-Gen-Pro")
48
+ image = client.predict("Image Generation", None, prompt, api_name="/image_gen_pro")
49
+ return image
50
+
51
+
52
+ # ---------- 文本和图像问答模块 ----------
53
+
54
+ def process_llava_input(message, history, processor):
55
+ """
56
+ 处理 LLaVA 图像问答输入,提取文本与图像输入,生成模型输入。
57
+ """
58
+ image = None
59
  if message["files"]:
60
+ image = message["files"][0] # 如果有上传的图像文件
61
  else:
62
  for hist in history:
63
+ if isinstance(hist[0], tuple):
64
+ image = hist[0][0] # 从历史记录中提取最后一个图像
65
+
66
  txt = message["text"]
 
 
67
  image = Image.open(image).convert("RGB")
68
  prompt = f"<|im_start|>user <image>\n{txt}<|im_end|><|im_start|>assistant"
 
69
  inputs = processor(prompt, image, return_tensors="pt")
70
  return inputs
71
 
 
 
 
 
 
72
 
73
+ def llava_answer(inputs, model):
74
+ """
75
+ 调用 LLaVA 模型回答图像问答请求。
76
+ """
77
+ # 这里调用模型生成回答的逻辑,返回回答结果(省略实现细节)
78
+ answer = model.generate_answer(inputs)
79
+ return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
 
 
 
81
 
82
+ # ---------- 网络搜索模块 ----------
83
+
84
+ def search(query):
85
+ """
86
+ 执行网络搜索,返回搜索结果文本。
87
+ """
88
+ search_results = []
89
+ with requests.Session() as session:
90
+ resp = session.get("https://www.google.com/search", params={"q": query, "num": 3})
91
+ # TODO: 使用 BeautifulSoup 提取返回的搜索结果标题和链接
92
+ # search_results = [(title, link), ...]
93
+ return search_results
94
+
95
+
96
+ # ---------- 回答生成模块 ----------
97
+
98
+ def respond(message, history, client):
99
+ """
100
+ 根据输入的消息和历史记录,选择合适的模型生成回答。
101
+ """
102
+ # 根据输入的模型 client 来决定使用哪个模型生成回答
103
+ response = client.predict(message)
104
+ return response
105
+
106
+
107
+ # ===================== Gradio 界面构建 =====================
108
+
109
+ def build_interface():
110
+ """
111
+ 构建 Gradio 界面布局,包括服务状态栏、文本聊天、图像生成和图像问答选项卡。
112
+ """
113
+ with gr.Blocks() as demo:
114
+ # 服务状态栏
115
+ gr.Markdown("# 服务状态")
116
+ gr.Markdown(get_service_status_markdown())
117
+
118
+ # 多模态交互主界面
119
+ with gr.Tab("文本聊天"):
120
+ chat_textbox = gr.Textbox(label="输入你的问题", placeholder="输入文本...")
121
+ chat_output = gr.Chatbot()
122
+ chat_button = gr.Button("发送")
123
+
124
+ with gr.Tab("图像生成"):
125
+ image_prompt = gr.Textbox(label="图像提示词", placeholder="输入描述来生成图像")
126
+ image_output = gr.Image()
127
+ image_button = gr.Button("生成图像")
128
+
129
+ with gr.Tab("图像问答"):
130
+ image_upload = gr.Image(label="上传图像")
131
+ image_question = gr.Textbox(label="提问", placeholder="输入关于图像的问题")
132
+ answer_output = gr.Textbox(label="回答")
133
+ answer_button = gr.Button("回答")
134
+
135
+ # 各个按钮的点击事件
136
+ chat_button.click(lambda msg, hist: respond(msg, hist, client_gemma), inputs=[chat_textbox, chat_output], outputs=chat_output)
137
+ image_button.click(image_gen, inputs=image_prompt, outputs=image_output)
138
+ answer_button.click(lambda msg, hist: llava_answer(process_llava_input(msg, hist, processor)), inputs=[image_upload, image_question], outputs=answer_output)
139
+
140
+ gr.Markdown("### 说明")
141
+ gr.Markdown("该助手支持文本聊天、图像生成和图像问答等功能。根据不同需求选择对应的选项卡使用。")
142
+
143
+ return demo
144
+
145
+ # 启动 Gradio 界面
146
+ if __name__ == "__main__":
147
+ demo = build_interface()
148
+ demo.launch()