Spaces:

Awk123
/

workspace

Runtime error

File size: 25,561 Bytes

e51d1a5

# Dependencies: gradio, fire, langchain, openai, numpy, ffmpeg, moviepy
# API Reference: https://www.gradio.app/docs/,
# https://github.com/zhayujie/chatgpt-on-wechat, https://docs.link-ai.tech/platform/api,  https://docs.link-ai.tech/api#/
# Description: This file contains the code to run the gradio app for the movie generator.
# 
#
#
# 参考链接： https://zhuanlan.zhihu.com/p/684798694
#
#
####################################################################################################

import gradio as gr
#import fire
from gradio_client import Client, file
import numpy as np
from langchain.chat_models import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage

from openai import OpenAI
import os
import moviepy.editor as mppyth
from moviepy.editor import *
# from movie_generator.agi.suno.suno import Suno
import requests


import ollama
from ollama import chat
from ollama import ChatResponse

# ollama.pull("deepseek-r1:1.5b")
# print( 'ollama result:',ollama.list())
# response: ChatResponse = chat(model='deepseek-r1:1.5b', messages=[
#   {
#     'role': 'user',
#     'content': 'Why is the sky blue?',
#   },
# ])
# print(response['message']['content'])
# # or access fields directly from the response object
# print(response.message.content)

def call_LLM(inputs, prompts= '你是一个时尚服装行业的专家， 请回答下面问题：', model_version = 'Qwen'):
    inputs = prompts + ' ' + inputs
    if model_version=="Qwen":
        from openai import OpenAI

        model_id = 'Qwen/Qwen2.5-3B-Instruct-GGUF'

        client = OpenAI(
            base_url='https://ms-fc-2ea3820b-8c19.api-inference.modelscope.cn/v1',
            api_key='e37bfdad-0f6a-46c2-a7bf-f9dc365967e3'
        )

        response=client.chat.completions.create(
            model=model_id,
            messages=[{"role":"user", "content":inputs}],
            stream=True
        )

        res= []
        for chunk in response:
            # print(chunk.choices[0].delta.content, end='', flush=True)
            res.append(chunk.choices[0].delta.content)
        return "".join(res)
    elif model_version in ['deepseek-r1:1.5b', 'llama3.2:latest']: 
        
        # model= 'deepseek-r1:1.5b'
        # model = 'llama3.2:latest'
        response: ChatResponse = chat(model= model_version, messages=[
        {
            'role': 'user',
            'content': prompts + " " + inputs,
        },
        ])
        return response['message']['content']
    else:
        return "LLM version is not supported yet."
import os
class GradioApp:
    def __init__(self,config=None):
        #config with info of 
        # model version
        # prompts
        #others
        self.config=config
        # self.image_dir = "/mnt/d/workspace/projects/Project_TextImage_Generator/examples"
        self.image_dir = "../examples"
        self.model_dir = os.path.join(self.image_dir, "models")
        self.clothes_dir = os.path.join(self.image_dir, "clothes")
        self.reference_dir = os.path.join(self.image_dir, "references")
        self.model_files = [os.path.join(self.model_dir, f) for f in os.listdir(self.model_dir)]
        self.clothes_files = [os.path.join(self.clothes_dir, f) for f in os.listdir(self.clothes_dir)]
        self.reference_files = [os.path.join(self.reference_dir, f) for f in os.listdir(self.reference_dir)]
        pass
    
    
    def test_image_func(self, input_image, filter_mode='sepia'):
        def filter_image(input_image, filter_mode='sepia'):
            def sepia(input_img):
                sepia_filter = np.array([
                    [0.393, 0.769, 0.189], 
                    [0.349, 0.686, 0.168], 
                    [0.272, 0.534, 0.131]
                ])
                sepia_img = input_img.dot(sepia_filter.T)
                sepia_img /= sepia_img.max()
                return sepia_img
            def grayscale(input_img):
                input_img = np.mean(input_img, axis=2) / np.max(input_img)
                return input_img
            if filter_mode == 'sepia':
                return sepia(input_image)
            elif filter_mode == 'grayscale':
                return grayscale(input_image)
            else:
                return input_image
        res = f"Got image from image input: {input_image}"
        filtered_image = filter_image(input_image, filter_mode)
        return res, filtered_image
    
    def dress_up_func(self, model_images, cloths_images, prompts, similarity):
        # 请求GPT response
        return "dress_up_func output",[(model_images, "模特"), (cloths_images, "衣服")]*5

    def update_model_func(self, model_images, cloths_images, prompts, similarity):
        # 请求GPT response
        return "update_model_func output", [(model_images, "模特"), (cloths_images, "衣服")]*5
    
    def image_module(self, mode='dress_up', title='image_module', desc=''):
        if mode == 'dress_up':
            # 模特试衣
            func = self.dress_up_func
        elif mode == 'update_model':
            # 更新模特
            func = self.update_model_func
        else:
            func = self.dress_up_func
        examples = []
        for i, (c, m) in enumerate( zip(self.clothes_files, self.model_files) ):
            examples.append([c, m, 'sepia', 0.6] )
        comp = gr.Interface(
                fn= func,
                inputs=[gr.Image(label='衣服', scale=1, height=300),
                        gr.Image(label='模特',scale=1, height=300),
                        gr.Dropdown(['sepia', 'grayscale']),
                        gr.Slider(0, 10, value=5, label="相似度控制", info="similarity between 2 and 20")],
                outputs=[gr.Textbox(label="文本输出"),
                         gr.Gallery(label='图片展示',height='auto',columns=3)
                         ],
                title=title,
                description=desc,
                theme="huggingface",
                examples=examples,
            )
        return comp
    
    def image_module_v2(self, mode='dress_up', title='image_module', desc=''):
        def upload_file(files, current_files):
            file_paths = current_files + [file.name for file in files]
            return file_paths

        def gen_images(clothes_img, model_img):
            new_images = []
            #call LLM/SD here
            new_images.append(clothes_img)
            new_images.append(model_img)
            return new_images
        
        def clear_images():
            return []
        def slider_func(val):
            print("slider value: ", val)


        if mode == 'dress_up':
            # 模特试衣
            func = self.dress_up_func
        elif mode == 'update_model':
            # 更新模特
            func = self.update_model_func
        else:
            func = self.dress_up_func

        with gr.Blocks() as demo:
            # first row
            with gr.Row():
                # first col -> input column
                with gr.Column():
                    model_image=gr.Image(label="模特图片",type='pil', height=None, width=None)
                    clothes_image=gr.Image(label="衣服图片",type='pil', height=None, width=None)
                    upload_button = gr.UploadButton("选择图片上传 (Upload Photos)", file_types=["image"], file_count="multiple")
                    generate_img_button = gr.Button("生成图片")
                    slider = gr.Slider(0, 10, value=5, label="相似度控制", info="similarity between 2 and 20")
                    clear_button = gr.Button("清空图片 (Clear Photos)")
                    
                    # analyze_button = gr.Button("显示图片信息 (Show Image Info)")
                    input_image_gallery = gr.Gallery(type='pil', label='输入图片列表 (Photos)', height=250, columns=4, visible=True)
                # second col-> output column
                with gr.Column():
                    image_gallery = gr.Gallery(type='pil', label='图片列表 (Photos)', height=250, columns=4, visible=True)
            # user_images = gr.State([])
            # upload_button.upload(upload_file, inputs=[upload_button, user_images], outputs=image_gallery)
            slider.input(fn=slider_func)
            generate_img_button.click(gen_images,inputs=[clothes_image, model_image], outputs= image_gallery)
            clear_button.click(fn=clear_images, inputs=None, outputs=image_gallery)
            # analyze_button.click(get_image_info, inputs=image_gallery, outputs=analysis_output)
            return demo

    def gen_text(self,inputs, LLM_version='Qwen'):
        # 设置前置prompt做限制
        prompts = "你是一个时尚服装行业的专家， 请回答下面问题,只罗列答案不要返回多余的词："
        # model= 'deepseek-r1:1.5b'
        # return call_LLM(inputs,prompts, model_version='llama3.2:latest')
        return call_LLM(inputs,prompts, model_version=LLM_version)
    
    def text_module(self, title='文本生成', desc="AI生成关键词"):
        comp = gr.Interface(
                fn= self.gen_text,
                inputs=[gr.Textbox(label="文本输入"), gr.Dropdown(['deepseek-r1:1.5b', 'llama3.2:latest','Qwen'], label='模型选择')],
                outputs=[gr.Textbox(label="结果输出")],
                title=title,
                description=desc,
                theme="huggingface",
                examples=[
                    ["列出2024年最受欢迎的10个衣服品牌","llama3.2:latest"],
                      ["哪些款式的女装比较潮流， 请列出10个女装品类","Qwen"],
                      ["随机生成10个衣服类目并列出来","Qwen"]],
                cache_examples=True,
            )
        return comp
    
    def generate_interface(self,):
        tab_interface_ls = {}
        # module 1: 生词
        tab_interface_ls['AI生词'] = self.text_module()

        # module 2: 服装上身
        tab_interface_ls['服装搭配'] = self.image_module('dress_up', title="服装搭配")
           
        # module 3: 换模特
        tab_interface_ls['更换模特'] = self.image_module('update_model', title="更换模特")

        comp = gr.TabbedInterface(
                list(tab_interface_ls.values()), list(tab_interface_ls.keys())
            )
        return comp

def main():
    print(f"Runing Gradio APP")
    component = GradioApp()
    component.generate_interface().launch(share=True)


if __name__ == "__main__":
    main()




# class GradioUnitTest():
#     def __init__(self):
#         api_key =  "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"
#         # "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"
#         os.environ["OPENAI_API_KEY"] = api_key
#         self.llm_model = ChatOpenAI(temperature=0.5, model="gpt-3.5-turbo")
#         # self.llm_model= None
#         self.client = OpenAI(api_key=api_key)
#         cur_path =os.getcwd()
#         root_path = '/'.join(cur_path.split("/")[:-2])

#         suno_result_path = os.path.join(root_path,'examples','suno_musics')
#         # self.suno = Suno(result_path=suno_result_path)
#         self.suno= None
#         pass
    
#     def test_text(self, input_text, mode = 'count'):
#         def process_test( _text, mode = 'count'):
#             def count_words(text):
#                 words = text.split(" ")
#                 res_dict = {}
#                 for word in words:
#                     if word in res_dict:
#                         res_dict[word] += 1
#                     else:
#                         res_dict[word] = 1
#                 res = "\n".join([f"word: {key}, count: {value}" for key, value in res_dict.items()])
#                 return res
            
#             def reverse_text(text):
#                 return text[::-1]
            
#             if mode == 'count':
#                 return count_words(_text)
#             return reverse_text(_text)

#         res = f"Got text from textbox: {input_text}"
#         return res, process_test(input_text, mode)
#         # return res, count_words(input_text)
    
#     def test_image(self, input_image, filter_mode='sepia'):
#         def filter_image(input_image, filter_mode='sepia'):
#             def sepia(input_img):
#                 sepia_filter = np.array([
#                     [0.393, 0.769, 0.189], 
#                     [0.349, 0.686, 0.168], 
#                     [0.272, 0.534, 0.131]
#                 ])
#                 sepia_img = input_img.dot(sepia_filter.T)
#                 sepia_img /= sepia_img.max()
#                 return sepia_img
#             def grayscale(input_img):
#                 input_img = np.mean(input_img, axis=2) / np.max(input_img)
#                 return input_img
            
#             if filter_mode == 'sepia':
#                 return sepia(input_image)
#             elif filter_mode == 'grayscale':
#                 return grayscale(input_image)
#             else:
#                 return input_image
#         res = f"Got image from image input: {input_image}"
#         filtered_image = filter_image(input_image, filter_mode)
#         return res, filtered_image
    
#     def test_audio(self, input_audio, filter_mode='echo', prompt='', checkbox_ls=[]):
#         def process_audio(input_audio, filter_mode='echo'):
#             print("input_audio shape: ", input_audio[1].shape, input_audio)
#             def echo(input_audio):
#                 aud = np.concatenate([input_audio[1], input_audio[1]], axis=0)
#                 return (input_audio[0], aud)
#             def reverse(input_audio):
#                 return (input_audio[0], input_audio[1][::-1]) 
            
#             if filter_mode == 'echo':
#                 res_audio = echo(input_audio)
#             elif filter_mode == 'reverse':
#                 res_audio = reverse(input_audio)
#             else:
#                 res_audio = input_audio
#             return res_audio
#         print("checkbox_ls: ", checkbox_ls)
#         res = f"Got audio from audio input: {input_audio}"
#         wait_audio = 'wait_audio' in checkbox_ls
#         make_instrumental = 'make_instrumental' in checkbox_ls
#         if checkbox_ls != []:
#             print('checlbox_ls: ', checkbox_ls)
#         generated_audio_path=''
#         if prompt != '':
#             music_paths = self.test_music_generation(prompt, make_instrumental, wait_audio)
#             generated_audio_path = '\n'.join(music_paths)
#             res = f"Got audio from suno: {generated_audio_path}"
#         processed_audio = process_audio(input_audio, filter_mode)
#         return res, processed_audio, generated_audio_path

#     def test_video(self, input_video, filter_mode='flip'):
#         def process_video(input_video, filter_mode='flip'):
#             print("input_video data: ", input_video)

#             def clip(input_video):
#                 clip1 = VideoFileClip(input_video)
#                 clip2 = VideoFileClip(input_video).subclip(2,3)
#                 clip3 = VideoFileClip(input_video)
#                 final_clip = concatenate_videoclips([clip1,clip2,clip3])
#                 output_video = "final_clip.mp4"
#                 final_clip.write_videofile(output_video)
#                 return output_video
#             def flip(input_video):
#                 return np.flip(input_video, axis=1)
#             def rotate(input_video):
#                 return np.rot90(input_video)
#             if filter_mode == 'clip':
#                 return clip(input_video)
#             elif filter_mode == 'flip':
#                 return flip(input_video)
#             elif filter_mode == 'rotate':
#                 return rotate(input_video)
#             else:
#                 return input_video
#         res = f"Got video from video input: {input_video}"
#         processed_video = process_video(input_video, filter_mode)
#         return res, processed_video

#     def test_chatbot(self, input_text, history):
#         history_langchain_format =[]
#         for human, ai in history:
#             history_langchain_format.append(HumanMessage(human))
#             history_langchain_format.append(AIMessage(ai))
#         history_langchain_format.append(content=input_text)
#         llm_response = self.llm_model(history_langchain_format)
#         return llm_response.content

#     def predict(self, message, history):
#         history_openai_format = []
#         for human, assistant in history:
#             history_openai_format.append({"role": "user", "content": human })
#             history_openai_format.append({"role": "assistant", "content":assistant})
#         history_openai_format.append({"role": "user", "content": message})
    
#         response = self.client.chat.completions.create(model='gpt-3.5-turbo',
#         messages= history_openai_format,
#         temperature=1.0,
#         stream=True)

#         partial_message = ""
#         for chunk in response:
#             if chunk.choices[0].delta.content is not None:
#                 partial_message = partial_message + chunk.choices[0].delta.content
#                 yield partial_message
    
#     def predict_v2(self, message, history):
        
#         url = "https://api.link-ai.chat/v1/chat/completions"
#         headers = {
#             'Authorization': 'Bearer Link_USN4Vru40ciqYkdpeWywmOOIOPHGLYm8EuAGm0xE0b',
#             'Content-Type': 'application/json'
#         }
#         history_openai_format = []
#         for human, assistant in history:
#             history_openai_format.append({"role": "user", "content": human })
#             history_openai_format.append({"role": "assistant", "content":assistant})
#         history_openai_format.append({"role": "user", "content": message})
    

#         data = {
#             "app_code": "default",
#             "messages": history_openai_format,
#         }

#         response = requests.post(url, headers=headers, json=data).json()
#         partial_message = ""
#         for chunk in response['choices']:
#             if chunk['message']["content"] is not None:
#                 partial_message = partial_message + chunk['message']["content"]
#                 yield partial_message
    

#     def predict_v3(self, message, history):
        
#         url = "https://api.link-ai.chat/v1/chat/completions"
#         headers = {
#             'Authorization': 'Bearer Link_USN4Vru40ciqYkdpeWywmOOIOPHGLYm8EuAGm0xE0b',
#             'Content-Type': 'application/json'
#         }
#         history_openai_format = []
#         for human, assistant in history:
#             history_openai_format.append({"role": "user", "content": human })
#             history_openai_format.append({"role": "assistant", "content":assistant})
#         history_openai_format.append({"role": "user", "content": message})
    

#         data = {
#             "app_code": "default",
#             "messages": history_openai_format,
#         }

#         response = requests.post(url, headers=headers, json=data).json()
#         partial_message = ""
#         for chunk in response['choices']:
#             if chunk['message']["content"] is not None:
#                 partial_message = partial_message + chunk['message']["content"]
#                 yield partial_message

#     def test_music_generation(self, prompt, make_instrumental=False, wait_audio=False):
#         request = {
#             "prompt": prompt,
#             "make_instrumental": make_instrumental,
#             "wait_audio": wait_audio
#             }
#         # music_ls = self.suno.generate_music(request)
#         music_ls = []
#         return music_ls

#     def run_test(self, mode='text'):
#         tab_interface_ls = {}
#         if mode == 'text' or mode == 'mix':
#             comp = gr.Interface(
#                 fn= self.test_text,
#                 inputs=['textbox', gr.Dropdown(['count', 'reverse'])],
#                 outputs=["textbox", "textbox"],
#                 title="test text module",
#                 description="test text.",
#                 theme="huggingface",
#                 examples=[
#                     ["A group of friends go on a road trip to find a hidden treasure."],
#                     ["A scientist discovers a way to travel through time."],
#                     ["A group of survivors try to escape a zombie apocalypse."],
#                 ],
#             )
#             tab_interface_ls['Text Ops'] = comp
#             if mode == 'text':
#                 return comp
#         if mode == 'image' or mode == 'mix':
#             # https://www.gradio.app/guides/the-interface-class
#             comp = gr.Interface(
#                 fn= self.test_image,
#                 inputs=['image', gr.Dropdown(['sepia', 'grayscale'])],
#                 outputs=["textbox",'image'],
#                 title="test image preprocess Module",
#                 description="test text.",
#                 theme="huggingface",
#                 examples=[
#                     ["/mnt/c/Users/wwk/Pictures/OIP.jpg", "sepia"],
#                 ],
#             )
#             tab_interface_ls['Image Ops'] = comp
#             if mode == 'image':
#                 return comp

#         if mode == 'audio' or mode == 'mix':
#             comp = gr.Interface(
#                 fn= self.test_audio,
#                 inputs=['audio', gr.Dropdown(['echo', 'reverse']), 'textbox', gr.CheckboxGroup([ 'make_instrumental' ,'wait_audio'],  label="Suno options", info="make_instrumental<bool>, wait_audio:<bool>") ],
#                 outputs=["textbox", 'audio'],
#                 title="test audio preprocess Module",
#                 description="test audio.",
#                 theme="huggingface",
#                 examples=[
#                     ["/mnt/d/workspace/projects/movie_generator/examples/audio/两只老虎，两只老虎-神秘-欢快-v2.mp3", "echo"],
#                     ["/mnt/d/workspace/projects/movie_generator/examples/audio/两只老虎，两只老虎-神秘-欢快-v2.mp3", "reverse"],
#                 ],
#             )
#             tab_interface_ls['Audio Ops'] = comp
#             if mode == 'audio':
#                 return comp
            
#         if mode == 'video' or mode == 'mix':
#             comp = gr.Interface(
#                 fn= self.test_video,
#                 inputs= [ 'video', gr.Dropdown(['clip', 'rotate'])],
#                 outputs=["textbox", 'video'],
#                 title="test video preprocess Module",
#                 description="test video.",
#                 theme="huggingface",
#                 examples=[
#                     ["/mnt/d/workspace/projects/movie_generator/examples/video/2月12日.mp4", "clip"],
#                 ],
#                 )
#             tab_interface_ls['Video Ops'] = comp
#             if mode == 'video':
#                 return comp
            
#         if mode == 'chat' or mode == 'mix':
#             # https://www.gradio.app/guides/creating-a-custom-chatbot-with-blocks
#             # comp = gr.ChatInterface(self.test_chatbot)
#             comp = gr.ChatInterface(self.predict_v2)
#             tab_interface_ls['ChatBot'] = comp
#             if mode == 'chat':
#                 return comp    
#         if mode == 'mix':
#             # mix mode, use radio button to select the mode
#             comp = gr.TabbedInterface(
#                 list(tab_interface_ls.values()), list(tab_interface_ls.keys())
#             )
#             return comp
#         else:
#             def flip_text(x):
#                 return x[::-1]
#             def flip_image(x):
#                 return np.fliplr(x)
#             with gr.Blocks() as comp:
#                 gr.Markdown("Flip text or image files using this demo.")
#                 with gr.Tab("Flip Text"):
#                     text_input = gr.Textbox()
#                     text_output = gr.Textbox()
#                     text_button = gr.Button("Flip")
#                 with gr.Tab("Flip Image"):
#                     with gr.Row():
#                         image_input = gr.Image()
#                         image_output = gr.Image()
#                     image_button = gr.Button("Flip")

#                 with gr.Accordion("Open for More!", open=False):
#                     gr.Markdown("Look at me...")
#                     temp_slider = gr.Slider(
#                         minimum=0.0,
#                         maximum=1.0,
#                         value=0.1,
#                         step=0.1,
#                         interactive=True,
#                         label="Slide me",
#                     )
#                     temp_slider.change(lambda x: x, [temp_slider])

#                 text_button.click(flip_text, inputs=text_input, outputs=text_output)
#                 image_button.click(flip_image, inputs=image_input, outputs=image_output)
#         return comp