workspace / appv2.py
Awk123's picture
upload files
e51d1a5
# Dependencies: gradio, fire, langchain, openai, numpy, ffmpeg, moviepy
# API Reference: https://www.gradio.app/docs/,
# https://github.com/zhayujie/chatgpt-on-wechat, https://docs.link-ai.tech/platform/api, https://docs.link-ai.tech/api#/
# Description: This file contains the code to run the gradio app for the movie generator.
#
#
#
# 参考链接: https://zhuanlan.zhihu.com/p/684798694
#
#
####################################################################################################
import gradio as gr
#import fire
from gradio_client import Client, file
import numpy as np
from langchain.chat_models import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage
from openai import OpenAI
import os
import moviepy.editor as mppyth
from moviepy.editor import *
# from movie_generator.agi.suno.suno import Suno
import requests
import ollama
from ollama import chat
from ollama import ChatResponse
# ollama.pull("deepseek-r1:1.5b")
# print( 'ollama result:',ollama.list())
# response: ChatResponse = chat(model='deepseek-r1:1.5b', messages=[
# {
# 'role': 'user',
# 'content': 'Why is the sky blue?',
# },
# ])
# print(response['message']['content'])
# # or access fields directly from the response object
# print(response.message.content)
def call_LLM(inputs, prompts= '你是一个时尚服装行业的专家, 请回答下面问题:', model_version = 'Qwen'):
inputs = prompts + ' ' + inputs
if model_version=="Qwen":
from openai import OpenAI
model_id = 'Qwen/Qwen2.5-3B-Instruct-GGUF'
client = OpenAI(
base_url='https://ms-fc-2ea3820b-8c19.api-inference.modelscope.cn/v1',
api_key='e37bfdad-0f6a-46c2-a7bf-f9dc365967e3'
)
response=client.chat.completions.create(
model=model_id,
messages=[{"role":"user", "content":inputs}],
stream=True
)
res= []
for chunk in response:
# print(chunk.choices[0].delta.content, end='', flush=True)
res.append(chunk.choices[0].delta.content)
return "".join(res)
elif model_version in ['deepseek-r1:1.5b', 'llama3.2:latest']:
# model= 'deepseek-r1:1.5b'
# model = 'llama3.2:latest'
response: ChatResponse = chat(model= model_version, messages=[
{
'role': 'user',
'content': prompts + " " + inputs,
},
])
return response['message']['content']
else:
return "LLM version is not supported yet."
import os
class GradioApp:
def __init__(self,config=None):
#config with info of
# model version
# prompts
#others
self.config=config
# self.image_dir = "/mnt/d/workspace/projects/Project_TextImage_Generator/examples"
self.image_dir = "../examples"
self.model_dir = os.path.join(self.image_dir, "models")
self.clothes_dir = os.path.join(self.image_dir, "clothes")
self.reference_dir = os.path.join(self.image_dir, "references")
self.model_files = [os.path.join(self.model_dir, f) for f in os.listdir(self.model_dir)]
self.clothes_files = [os.path.join(self.clothes_dir, f) for f in os.listdir(self.clothes_dir)]
self.reference_files = [os.path.join(self.reference_dir, f) for f in os.listdir(self.reference_dir)]
pass
def test_image_func(self, input_image, filter_mode='sepia'):
def filter_image(input_image, filter_mode='sepia'):
def sepia(input_img):
sepia_filter = np.array([
[0.393, 0.769, 0.189],
[0.349, 0.686, 0.168],
[0.272, 0.534, 0.131]
])
sepia_img = input_img.dot(sepia_filter.T)
sepia_img /= sepia_img.max()
return sepia_img
def grayscale(input_img):
input_img = np.mean(input_img, axis=2) / np.max(input_img)
return input_img
if filter_mode == 'sepia':
return sepia(input_image)
elif filter_mode == 'grayscale':
return grayscale(input_image)
else:
return input_image
res = f"Got image from image input: {input_image}"
filtered_image = filter_image(input_image, filter_mode)
return res, filtered_image
def dress_up_func(self, model_images, cloths_images, prompts, similarity):
# 请求GPT response
return "dress_up_func output",[(model_images, "模特"), (cloths_images, "衣服")]*5
def update_model_func(self, model_images, cloths_images, prompts, similarity):
# 请求GPT response
return "update_model_func output", [(model_images, "模特"), (cloths_images, "衣服")]*5
def image_module(self, mode='dress_up', title='image_module', desc=''):
if mode == 'dress_up':
# 模特试衣
func = self.dress_up_func
elif mode == 'update_model':
# 更新模特
func = self.update_model_func
else:
func = self.dress_up_func
examples = []
for i, (c, m) in enumerate( zip(self.clothes_files, self.model_files) ):
examples.append([c, m, 'sepia', 0.6] )
comp = gr.Interface(
fn= func,
inputs=[gr.Image(label='衣服', scale=1, height=300),
gr.Image(label='模特',scale=1, height=300),
gr.Dropdown(['sepia', 'grayscale']),
gr.Slider(0, 10, value=5, label="相似度控制", info="similarity between 2 and 20")],
outputs=[gr.Textbox(label="文本输出"),
gr.Gallery(label='图片展示',height='auto',columns=3)
],
title=title,
description=desc,
theme="huggingface",
examples=examples,
)
return comp
def image_module_v2(self, mode='dress_up', title='image_module', desc=''):
def upload_file(files, current_files):
file_paths = current_files + [file.name for file in files]
return file_paths
def gen_images(clothes_img, model_img):
new_images = []
#call LLM/SD here
new_images.append(clothes_img)
new_images.append(model_img)
return new_images
def clear_images():
return []
def slider_func(val):
print("slider value: ", val)
if mode == 'dress_up':
# 模特试衣
func = self.dress_up_func
elif mode == 'update_model':
# 更新模特
func = self.update_model_func
else:
func = self.dress_up_func
with gr.Blocks() as demo:
# first row
with gr.Row():
# first col -> input column
with gr.Column():
model_image=gr.Image(label="模特图片",type='pil', height=None, width=None)
clothes_image=gr.Image(label="衣服图片",type='pil', height=None, width=None)
upload_button = gr.UploadButton("选择图片上传 (Upload Photos)", file_types=["image"], file_count="multiple")
generate_img_button = gr.Button("生成图片")
slider = gr.Slider(0, 10, value=5, label="相似度控制", info="similarity between 2 and 20")
clear_button = gr.Button("清空图片 (Clear Photos)")
# analyze_button = gr.Button("显示图片信息 (Show Image Info)")
input_image_gallery = gr.Gallery(type='pil', label='输入图片列表 (Photos)', height=250, columns=4, visible=True)
# second col-> output column
with gr.Column():
image_gallery = gr.Gallery(type='pil', label='图片列表 (Photos)', height=250, columns=4, visible=True)
# user_images = gr.State([])
# upload_button.upload(upload_file, inputs=[upload_button, user_images], outputs=image_gallery)
slider.input(fn=slider_func)
generate_img_button.click(gen_images,inputs=[clothes_image, model_image], outputs= image_gallery)
clear_button.click(fn=clear_images, inputs=None, outputs=image_gallery)
# analyze_button.click(get_image_info, inputs=image_gallery, outputs=analysis_output)
return demo
def gen_text(self,inputs, LLM_version='Qwen'):
# 设置前置prompt做限制
prompts = "你是一个时尚服装行业的专家, 请回答下面问题,只罗列答案不要返回多余的词:"
# model= 'deepseek-r1:1.5b'
# return call_LLM(inputs,prompts, model_version='llama3.2:latest')
return call_LLM(inputs,prompts, model_version=LLM_version)
def text_module(self, title='文本生成', desc="AI生成关键词"):
comp = gr.Interface(
fn= self.gen_text,
inputs=[gr.Textbox(label="文本输入"), gr.Dropdown(['deepseek-r1:1.5b', 'llama3.2:latest','Qwen'], label='模型选择')],
outputs=[gr.Textbox(label="结果输出")],
title=title,
description=desc,
theme="huggingface",
examples=[
["列出2024年最受欢迎的10个衣服品牌","llama3.2:latest"],
["哪些款式的女装比较潮流, 请列出10个女装品类","Qwen"],
["随机生成10个衣服类目并列出来","Qwen"]],
cache_examples=True,
)
return comp
def generate_interface(self,):
tab_interface_ls = {}
# module 1: 生词
tab_interface_ls['AI生词'] = self.text_module()
# module 2: 服装上身
tab_interface_ls['服装搭配'] = self.image_module('dress_up', title="服装搭配")
# module 3: 换模特
tab_interface_ls['更换模特'] = self.image_module('update_model', title="更换模特")
comp = gr.TabbedInterface(
list(tab_interface_ls.values()), list(tab_interface_ls.keys())
)
return comp
def main():
print(f"Runing Gradio APP")
component = GradioApp()
component.generate_interface().launch(share=True)
if __name__ == "__main__":
main()
# class GradioUnitTest():
# def __init__(self):
# api_key = "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"
# # "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"
# os.environ["OPENAI_API_KEY"] = api_key
# self.llm_model = ChatOpenAI(temperature=0.5, model="gpt-3.5-turbo")
# # self.llm_model= None
# self.client = OpenAI(api_key=api_key)
# cur_path =os.getcwd()
# root_path = '/'.join(cur_path.split("/")[:-2])
# suno_result_path = os.path.join(root_path,'examples','suno_musics')
# # self.suno = Suno(result_path=suno_result_path)
# self.suno= None
# pass
# def test_text(self, input_text, mode = 'count'):
# def process_test( _text, mode = 'count'):
# def count_words(text):
# words = text.split(" ")
# res_dict = {}
# for word in words:
# if word in res_dict:
# res_dict[word] += 1
# else:
# res_dict[word] = 1
# res = "\n".join([f"word: {key}, count: {value}" for key, value in res_dict.items()])
# return res
# def reverse_text(text):
# return text[::-1]
# if mode == 'count':
# return count_words(_text)
# return reverse_text(_text)
# res = f"Got text from textbox: {input_text}"
# return res, process_test(input_text, mode)
# # return res, count_words(input_text)
# def test_image(self, input_image, filter_mode='sepia'):
# def filter_image(input_image, filter_mode='sepia'):
# def sepia(input_img):
# sepia_filter = np.array([
# [0.393, 0.769, 0.189],
# [0.349, 0.686, 0.168],
# [0.272, 0.534, 0.131]
# ])
# sepia_img = input_img.dot(sepia_filter.T)
# sepia_img /= sepia_img.max()
# return sepia_img
# def grayscale(input_img):
# input_img = np.mean(input_img, axis=2) / np.max(input_img)
# return input_img
# if filter_mode == 'sepia':
# return sepia(input_image)
# elif filter_mode == 'grayscale':
# return grayscale(input_image)
# else:
# return input_image
# res = f"Got image from image input: {input_image}"
# filtered_image = filter_image(input_image, filter_mode)
# return res, filtered_image
# def test_audio(self, input_audio, filter_mode='echo', prompt='', checkbox_ls=[]):
# def process_audio(input_audio, filter_mode='echo'):
# print("input_audio shape: ", input_audio[1].shape, input_audio)
# def echo(input_audio):
# aud = np.concatenate([input_audio[1], input_audio[1]], axis=0)
# return (input_audio[0], aud)
# def reverse(input_audio):
# return (input_audio[0], input_audio[1][::-1])
# if filter_mode == 'echo':
# res_audio = echo(input_audio)
# elif filter_mode == 'reverse':
# res_audio = reverse(input_audio)
# else:
# res_audio = input_audio
# return res_audio
# print("checkbox_ls: ", checkbox_ls)
# res = f"Got audio from audio input: {input_audio}"
# wait_audio = 'wait_audio' in checkbox_ls
# make_instrumental = 'make_instrumental' in checkbox_ls
# if checkbox_ls != []:
# print('checlbox_ls: ', checkbox_ls)
# generated_audio_path=''
# if prompt != '':
# music_paths = self.test_music_generation(prompt, make_instrumental, wait_audio)
# generated_audio_path = '\n'.join(music_paths)
# res = f"Got audio from suno: {generated_audio_path}"
# processed_audio = process_audio(input_audio, filter_mode)
# return res, processed_audio, generated_audio_path
# def test_video(self, input_video, filter_mode='flip'):
# def process_video(input_video, filter_mode='flip'):
# print("input_video data: ", input_video)
# def clip(input_video):
# clip1 = VideoFileClip(input_video)
# clip2 = VideoFileClip(input_video).subclip(2,3)
# clip3 = VideoFileClip(input_video)
# final_clip = concatenate_videoclips([clip1,clip2,clip3])
# output_video = "final_clip.mp4"
# final_clip.write_videofile(output_video)
# return output_video
# def flip(input_video):
# return np.flip(input_video, axis=1)
# def rotate(input_video):
# return np.rot90(input_video)
# if filter_mode == 'clip':
# return clip(input_video)
# elif filter_mode == 'flip':
# return flip(input_video)
# elif filter_mode == 'rotate':
# return rotate(input_video)
# else:
# return input_video
# res = f"Got video from video input: {input_video}"
# processed_video = process_video(input_video, filter_mode)
# return res, processed_video
# def test_chatbot(self, input_text, history):
# history_langchain_format =[]
# for human, ai in history:
# history_langchain_format.append(HumanMessage(human))
# history_langchain_format.append(AIMessage(ai))
# history_langchain_format.append(content=input_text)
# llm_response = self.llm_model(history_langchain_format)
# return llm_response.content
# def predict(self, message, history):
# history_openai_format = []
# for human, assistant in history:
# history_openai_format.append({"role": "user", "content": human })
# history_openai_format.append({"role": "assistant", "content":assistant})
# history_openai_format.append({"role": "user", "content": message})
# response = self.client.chat.completions.create(model='gpt-3.5-turbo',
# messages= history_openai_format,
# temperature=1.0,
# stream=True)
# partial_message = ""
# for chunk in response:
# if chunk.choices[0].delta.content is not None:
# partial_message = partial_message + chunk.choices[0].delta.content
# yield partial_message
# def predict_v2(self, message, history):
# url = "https://api.link-ai.chat/v1/chat/completions"
# headers = {
# 'Authorization': 'Bearer Link_USN4Vru40ciqYkdpeWywmOOIOPHGLYm8EuAGm0xE0b',
# 'Content-Type': 'application/json'
# }
# history_openai_format = []
# for human, assistant in history:
# history_openai_format.append({"role": "user", "content": human })
# history_openai_format.append({"role": "assistant", "content":assistant})
# history_openai_format.append({"role": "user", "content": message})
# data = {
# "app_code": "default",
# "messages": history_openai_format,
# }
# response = requests.post(url, headers=headers, json=data).json()
# partial_message = ""
# for chunk in response['choices']:
# if chunk['message']["content"] is not None:
# partial_message = partial_message + chunk['message']["content"]
# yield partial_message
# def predict_v3(self, message, history):
# url = "https://api.link-ai.chat/v1/chat/completions"
# headers = {
# 'Authorization': 'Bearer Link_USN4Vru40ciqYkdpeWywmOOIOPHGLYm8EuAGm0xE0b',
# 'Content-Type': 'application/json'
# }
# history_openai_format = []
# for human, assistant in history:
# history_openai_format.append({"role": "user", "content": human })
# history_openai_format.append({"role": "assistant", "content":assistant})
# history_openai_format.append({"role": "user", "content": message})
# data = {
# "app_code": "default",
# "messages": history_openai_format,
# }
# response = requests.post(url, headers=headers, json=data).json()
# partial_message = ""
# for chunk in response['choices']:
# if chunk['message']["content"] is not None:
# partial_message = partial_message + chunk['message']["content"]
# yield partial_message
# def test_music_generation(self, prompt, make_instrumental=False, wait_audio=False):
# request = {
# "prompt": prompt,
# "make_instrumental": make_instrumental,
# "wait_audio": wait_audio
# }
# # music_ls = self.suno.generate_music(request)
# music_ls = []
# return music_ls
# def run_test(self, mode='text'):
# tab_interface_ls = {}
# if mode == 'text' or mode == 'mix':
# comp = gr.Interface(
# fn= self.test_text,
# inputs=['textbox', gr.Dropdown(['count', 'reverse'])],
# outputs=["textbox", "textbox"],
# title="test text module",
# description="test text.",
# theme="huggingface",
# examples=[
# ["A group of friends go on a road trip to find a hidden treasure."],
# ["A scientist discovers a way to travel through time."],
# ["A group of survivors try to escape a zombie apocalypse."],
# ],
# )
# tab_interface_ls['Text Ops'] = comp
# if mode == 'text':
# return comp
# if mode == 'image' or mode == 'mix':
# # https://www.gradio.app/guides/the-interface-class
# comp = gr.Interface(
# fn= self.test_image,
# inputs=['image', gr.Dropdown(['sepia', 'grayscale'])],
# outputs=["textbox",'image'],
# title="test image preprocess Module",
# description="test text.",
# theme="huggingface",
# examples=[
# ["/mnt/c/Users/wwk/Pictures/OIP.jpg", "sepia"],
# ],
# )
# tab_interface_ls['Image Ops'] = comp
# if mode == 'image':
# return comp
# if mode == 'audio' or mode == 'mix':
# comp = gr.Interface(
# fn= self.test_audio,
# inputs=['audio', gr.Dropdown(['echo', 'reverse']), 'textbox', gr.CheckboxGroup([ 'make_instrumental' ,'wait_audio'], label="Suno options", info="make_instrumental<bool>, wait_audio:<bool>") ],
# outputs=["textbox", 'audio'],
# title="test audio preprocess Module",
# description="test audio.",
# theme="huggingface",
# examples=[
# ["/mnt/d/workspace/projects/movie_generator/examples/audio/两只老虎,两只老虎-神秘-欢快-v2.mp3", "echo"],
# ["/mnt/d/workspace/projects/movie_generator/examples/audio/两只老虎,两只老虎-神秘-欢快-v2.mp3", "reverse"],
# ],
# )
# tab_interface_ls['Audio Ops'] = comp
# if mode == 'audio':
# return comp
# if mode == 'video' or mode == 'mix':
# comp = gr.Interface(
# fn= self.test_video,
# inputs= [ 'video', gr.Dropdown(['clip', 'rotate'])],
# outputs=["textbox", 'video'],
# title="test video preprocess Module",
# description="test video.",
# theme="huggingface",
# examples=[
# ["/mnt/d/workspace/projects/movie_generator/examples/video/2月12日.mp4", "clip"],
# ],
# )
# tab_interface_ls['Video Ops'] = comp
# if mode == 'video':
# return comp
# if mode == 'chat' or mode == 'mix':
# # https://www.gradio.app/guides/creating-a-custom-chatbot-with-blocks
# # comp = gr.ChatInterface(self.test_chatbot)
# comp = gr.ChatInterface(self.predict_v2)
# tab_interface_ls['ChatBot'] = comp
# if mode == 'chat':
# return comp
# if mode == 'mix':
# # mix mode, use radio button to select the mode
# comp = gr.TabbedInterface(
# list(tab_interface_ls.values()), list(tab_interface_ls.keys())
# )
# return comp
# else:
# def flip_text(x):
# return x[::-1]
# def flip_image(x):
# return np.fliplr(x)
# with gr.Blocks() as comp:
# gr.Markdown("Flip text or image files using this demo.")
# with gr.Tab("Flip Text"):
# text_input = gr.Textbox()
# text_output = gr.Textbox()
# text_button = gr.Button("Flip")
# with gr.Tab("Flip Image"):
# with gr.Row():
# image_input = gr.Image()
# image_output = gr.Image()
# image_button = gr.Button("Flip")
# with gr.Accordion("Open for More!", open=False):
# gr.Markdown("Look at me...")
# temp_slider = gr.Slider(
# minimum=0.0,
# maximum=1.0,
# value=0.1,
# step=0.1,
# interactive=True,
# label="Slide me",
# )
# temp_slider.change(lambda x: x, [temp_slider])
# text_button.click(flip_text, inputs=text_input, outputs=text_output)
# image_button.click(flip_image, inputs=image_input, outputs=image_output)
# return comp