File size: 4,243 Bytes
eaf5e44
0f74001
eaf5e44
 
 
 
 
 
 
 
 
 
0f74001
eaf5e44
 
 
 
0f74001
7b5e727
eaf5e44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6aaffa2
eaf5e44
 
580bff2
 
 
 
 
eaf5e44
ccd534b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
580bff2
ccd534b
 
 
6aaffa2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import gradio as gr
from langchain.agents import  initialize_agent,AgentType
from langchain.chat_models import AzureChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
import torch
from transformers import BlipProcessor,BlipForConditionalGeneration
import requests
from PIL import Image
from langchain.tools import BaseTool
from langchain.chains import LLMChain
from langchain import PromptTemplate, FewShotPromptTemplate

OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
OPENAI_API_BASE=os.getenv("OPENAI_API_BASE")
DEP_NAME=os.getenv("deployment_name")
llm=AzureChatOpenAI(deployment_name=DEP_NAME,openai_api_base=OPENAI_API_BASE,openai_api_key=OPENAI_API_KEY,openai_api_version="2023-03-15-preview",model_name="gpt-3.5-turbo")


image_to_text_model="Salesforce/blip-image-captioning-large"
device= 'cuda' if torch.cuda.is_available() else 'cpu'

processor=BlipProcessor.from_pretrained(image_to_text_model)

model=BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device)

def descImage(image_url):
    image_obj=Image.open(image_url).convert('RGB')
    inputs=processor(image_obj,return_tensors='pt').to(device)
    outputs=model.generate(**inputs)
    return processor.decode(outputs[0],skip_special_tokens=True)

def toChinese(en:str):
    pp="翻译下面语句到中文\n{en}"
    prompt = PromptTemplate(
            input_variables=["en"],
            template=pp
        )
    llchain=LLMChain(llm=llm,prompt=prompt)
    return llchain.run(en)

class DescTool(BaseTool):
    name="Describe Image Tool"
    description="use this tool to describe an image"

    def _run(self,url:str):
        description=descImage(url)
        return description
    def _arun(
        self,query:str):
        raise NotImplementedError('未实现')

tools=[DescTool()]
memory=ConversationBufferWindowMemory(
                          memory_key='chat_history',
                          k=5,
                          return_messages=True
                      )

agent=initialize_agent(
                      agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
                      tools=tools,
                      llm=llm,
                      verbose=False,
                      max_iterations=3,
                      early_stopping_method='generate',
                      memory=memory
                      )
def reset_user_input():
    return gr.update(value='')
def reset_state():
    return [], []

def predict(file,input):
    input1=f""+input+"\n"+file
    out=agent(input1)
    anws = toChinese(out['output'])
    # chatbot.append(input)
    # chatbot[-1] = (input, anws)
    # yield chatbot, history
    return anws

# with gr.Blocks(css=".chat-blocks{height:calc(100vh - 332px);} .mychat{flex:1} .mychat .block{min-height:100%} .mychat .block .wrap{max-height: calc(100vh - 330px);} .myinput{flex:initial !important;min-height:180px}") as demo:
#     title = '图像识别'
#     demo.title=title
#     with gr.Column(elem_classes="chat-blocks"):
#         with gr.Row(elem_classes="mychat"):
#             file = gr.Image(type="filepath")
#             chatbot = gr.Chatbot(label="图像识别", show_label=False)
#         with gr.Column(elem_classes="myinput"):
#             user_input = gr.Textbox(show_label=False, placeholder="请描述您的问题", lines=1).style(
#                 container=False)
#             submitBtn = gr.Button("提交", variant="primary", elem_classes="btn1")
#             emptyBtn = gr.Button("清除历史").style(container=False)
#
#     history = gr.State([])
#     submitBtn.click(predict, [file,user_input, chatbot,history], [chatbot, history],
#                         show_progress=True)
#     submitBtn.click(reset_user_input, [], [user_input])
#     emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)
with gr.Blocks() as demo:
    image_url = gr.Image(type="filepath",label="请选择一张图片")
    input = gr.Textbox(label='请描述您的问题', placeholder="", lines=1)
    output = gr.Textbox(label='答案', placeholder="", lines=2,interactive=False)
    submit = gr.Button('提问',variant="primary")
    submit.click(predict,inputs=[image_url,input],outputs=output)
demo.launch()