zh21608 commited on
Commit
e4f9749
1 Parent(s): e04297d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.agents import load_tools
2
+ from langchain.agents import initialize_agent
3
+ from langchain.agents import AgentType
4
+ from langchain.llms import OpenAI
5
+ from langchain.chat_models import AzureChatOpenAI
6
+ from langchain.chains.conversation.memory import ConversationBufferWindowMemory
7
+ import os
8
+ from langchain.tools import BaseTool
9
+ import torch
10
+ from transformers import BlipProcessor, BlipForConditionalGeneration
11
+ from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput
12
+ import requests
13
+ from PIL import Image
14
+ import gradio as gr
15
+ from langchain.chains import LLMChain
16
+ from langchain.prompts import PromptTemplate
17
+
18
+ OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
19
+ OPENAI_API_BASE=os.getenv("OPENAI_API_BASE")
20
+ DEP_NAME=os.getenv("deployment_name")
21
+ llm=AzureChatOpenAI(deployment_name=DEP_NAME,openai_api_base=OPENAI_API_BASE,openai_api_key=OPENAI_API_KEY,openai_api_version="2023-03-15-preview",model_name="gpt-3.5-turbo")
22
+
23
+ image_to_text_model = "Salesforce/blip-image-captioning-large"
24
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
25
+
26
+ model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device)
27
+ processor = BlipProcessor.from_pretrained(image_to_text_model)
28
+
29
+ def describeImage(image_url):
30
+ #image_object = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
31
+ image_object = Image.open(image_url).convert('RGB')
32
+ # image
33
+ inputs = processor(image_object, return_tensors="pt").to(device)
34
+ outputs = model.generate(**inputs)
35
+ return processor.decode(outputs[0], skip_special_tokens=True)
36
+
37
+
38
+ class DescribeImageTool(BaseTool):
39
+ name = "Describe Image Tool"
40
+ description = 'use this tool to describe an image.'
41
+
42
+ def _run(self, url: str):
43
+ description = describeImage(url)
44
+ return description
45
+
46
+ def _arun(self, query: str):
47
+ raise NotImplementedError("Async operation not supported yet")
48
+
49
+ tools = [DescribeImageTool()]
50
+
51
+ def enToChinese(english):
52
+ pp = "Please translate the following sentence from English to Chinese:{english}"
53
+ prompt = PromptTemplate(
54
+ input_variables=["english"],
55
+ template=pp
56
+ )
57
+ llchain=LLMChain(llm=llm,prompt=prompt)
58
+ return llchain.run(english)
59
+
60
+
61
+ def chToEnglish(chinese):
62
+ pp = "Please translate the following sentence from Chinese to English:{chinese}"
63
+ prompt = PromptTemplate(
64
+ input_variables=["chinese"],
65
+ template=pp
66
+ )
67
+ llchain=LLMChain(llm=llm,prompt=prompt)
68
+ return llchain.run(chinese)
69
+
70
+
71
+ agent = initialize_agent(
72
+ agent='chat-conversational-react-description',
73
+ tools=tools,
74
+ llm=llm,
75
+ verbose=True,
76
+ max_iterations=3,
77
+ early_stopping_method='generate',
78
+ memory=ConversationBufferWindowMemory(
79
+ memory_key='chat_history',
80
+ k=5,
81
+ return_messages=True
82
+ )
83
+ )
84
+
85
+ def image_to_txt(image_url,user_input):
86
+ user_input = chToEnglish(user_input)
87
+ return enToChinese(agent(f"{input}:\n{image_url}")['output'])
88
+
89
+ with gr.Blocks() as demo:
90
+ image_url = gr.Image(type="filepath",label="请选择一张图片")
91
+ print(image_url)
92
+ user_input = gr.Textbox(show_label=False, placeholder="请输入...", lines=1)
93
+ output = gr.Textbox(show_label=False, placeholder="", lines=1)
94
+ submit_btn = gr.Button('提交',variant="primary")
95
+ submit_btn.click(image_to_txt,inputs=[image_url,user_input],outputs=output)
96
+
97
+ demo.launch()