|
|
|
|
|
|
|
import re |
|
import io |
|
import os |
|
import ssl |
|
from typing import Optional, Tuple |
|
import datetime |
|
import sys |
|
import gradio as gr |
|
import requests |
|
import json |
|
from threading import Lock |
|
from langchain import ConversationChain, LLMChain |
|
from langchain.agents import load_tools, initialize_agent, Tool |
|
from langchain.tools.bing_search.tool import BingSearchRun, BingSearchAPIWrapper |
|
from langchain.chains.conversation.memory import ConversationBufferMemory |
|
from langchain.llms import OpenAI |
|
from langchain.chains import PALChain |
|
from langchain.llms import AzureOpenAI |
|
from langchain.utilities import ImunAPIWrapper, ImunMultiAPIWrapper |
|
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError |
|
import argparse |
|
|
|
|
|
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] |
|
TOOLS_LIST = ['pal-math', 'imun'] |
|
TOOLS_DEFAULT_LIST = ['pal-math', 'imun'] |
|
BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!" |
|
AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. " |
|
MAX_TOKENS = 512 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
IMAGE_COUNT=0 |
|
|
|
|
|
AGRS = None |
|
|
|
|
|
|
|
|
|
ssl._create_default_https_context = ssl._create_unverified_context |
|
|
|
|
|
def get_caption_onnx_api(imgf): |
|
|
|
headers = { |
|
'Content-Type': 'application/octet-stream', |
|
'Ocp-Apim-Subscription-Key': header_key, |
|
} |
|
|
|
params = { |
|
'features': 'description', |
|
'model-version': 'latest', |
|
'language': 'en', |
|
'descriptionExclude': 'Celebrities,Landmarks', |
|
} |
|
|
|
with open(imgf, 'rb') as f: |
|
data = f.read() |
|
|
|
response = requests.post('https://cvfiahmed.cognitiveservices.azure.com/vision/v2022-07-31-preview/operations/imageanalysis:analyze', params=params, headers=headers, data=data) |
|
|
|
return json.loads(response.content)['descriptionResult']['values'][0]['text'] |
|
|
|
def reset_memory(history): |
|
|
|
|
|
print ("clearning memory, loading langchain...") |
|
load_chain() |
|
history = [] |
|
return history, history |
|
|
|
|
|
def load_chain(history): |
|
global ARGS |
|
|
|
|
|
|
|
|
|
if ARGS.openAIModel == 'openAIGPT35': |
|
|
|
llm = OpenAI(temperature=0, max_tokens=MAX_TOKENS) |
|
elif ARGS.openAIModel == 'azureChatGPT': |
|
|
|
|
|
|
|
llm = AzureOpenAI(deployment_name="text-chat-davinci-002", model_name="text-chat-davinci-002", temperature=0, max_tokens=MAX_TOKENS) |
|
|
|
elif ARGS.openAIModel == 'azureTextDavinci003': |
|
|
|
|
|
llm = AzureOpenAI(deployment_name="text-davinci-003", model_name="text-davinci-003", temperature=0, max_tokens=MAX_TOKENS) |
|
|
|
|
|
|
|
memory = ConversationBufferMemory(memory_key="chat_history") |
|
|
|
|
|
|
|
|
|
imun_dense = ImunAPIWrapper( |
|
imun_url="https://ehazarwestus.cognitiveservices.azure.com/computervision/imageanalysis:analyze", |
|
params="api-version=2023-02-01-preview&model-version=latest&features=denseCaptions", |
|
imun_subscription_key=os.environ["IMUN_SUBSCRIPTION_KEY2"]) |
|
|
|
imun = ImunAPIWrapper() |
|
imun = ImunMultiAPIWrapper(imuns=[imun, imun_dense]) |
|
|
|
imun_celeb = ImunAPIWrapper( |
|
imun_url="https://cvfiahmed.cognitiveservices.azure.com/vision/v3.2/models/celebrities/analyze", |
|
params="") |
|
|
|
imun_read = ImunAPIWrapper( |
|
imun_url="https://vigehazar.cognitiveservices.azure.com/formrecognizer/documentModels/prebuilt-read:analyze", |
|
params="api-version=2022-08-31", |
|
imun_subscription_key=os.environ["IMUN_OCR_SUBSCRIPTION_KEY"]) |
|
|
|
imun_receipt = ImunAPIWrapper( |
|
imun_url="https://vigehazar.cognitiveservices.azure.com/formrecognizer/documentModels/prebuilt-receipt:analyze", |
|
params="api-version=2022-08-31", |
|
imun_subscription_key=os.environ["IMUN_OCR_SUBSCRIPTION_KEY"]) |
|
|
|
imun_businesscard = ImunAPIWrapper( |
|
imun_url="https://vigehazar.cognitiveservices.azure.com/formrecognizer/documentModels/prebuilt-businessCard:analyze", |
|
params="api-version=2022-08-31", |
|
imun_subscription_key=os.environ["IMUN_OCR_SUBSCRIPTION_KEY"]) |
|
|
|
imun_layout = ImunAPIWrapper( |
|
imun_url="https://vigehazar.cognitiveservices.azure.com/formrecognizer/documentModels/prebuilt-layout:analyze", |
|
params="api-version=2022-08-31", |
|
imun_subscription_key=os.environ["IMUN_OCR_SUBSCRIPTION_KEY"]) |
|
|
|
bing = BingSearchAPIWrapper(k=2) |
|
|
|
def edit_photo(query: str) -> str: |
|
endpoint = "http://10.123.124.92:7863/" |
|
query = query.strip() |
|
url_idx = query.rfind(" ") |
|
img_url = query[url_idx + 1:].strip() |
|
if img_url.endswith((".", "?")): |
|
img_url = img_url[:-1] |
|
if not img_url.startswith(("http://", "https://")): |
|
return "Invalid image URL" |
|
img_url = img_url.replace("0.0.0.0", "10.123.124.92") |
|
instruction = query[:url_idx] |
|
|
|
job = {"image_path": img_url, "instruction": instruction} |
|
response = requests.post(endpoint, json=job) |
|
if response.status_code != 200: |
|
return "Could not finish the task try again later!" |
|
return "Here is the edited image " + endpoint + response.json()["edited_image"] |
|
|
|
|
|
tools = [ |
|
Tool( |
|
name="PAL-MATH", |
|
func=PALChain.from_math_prompt(llm).run, |
|
description=( |
|
"A wrapper around calculator. " |
|
"A language model that is really good at solving complex word math problems." |
|
"Input should be a fully worded hard word math problem." |
|
) |
|
), |
|
Tool( |
|
name = "Image Understanding", |
|
func=imun.run, |
|
description=( |
|
"A wrapper around Image Understanding. " |
|
"Useful for when you need to understand what is inside an image (objects, texts, people)." |
|
"Input should be an image url, or path to an image file (e.g. .jpg, .png)." |
|
) |
|
), |
|
Tool( |
|
name = "OCR Understanding", |
|
func=imun_read.run, |
|
description=( |
|
"A wrapper around OCR Understanding (Optical Character Recognition). " |
|
"Useful after Image Understanding tool has found text or handwriting is present in the image tags." |
|
"This tool can find the actual text, written name, or product name in the image." |
|
"Input should be an image url, or path to an image file (e.g. .jpg, .png)." |
|
) |
|
), |
|
Tool( |
|
name = "Receipt Understanding", |
|
func=imun_receipt.run, |
|
description=( |
|
"A wrapper receipt understanding. " |
|
"Useful after Image Understanding tool has recognized a receipt in the image tags." |
|
"This tool can find the actual receipt text, prices and detailed items." |
|
"Input should be an image url, or path to an image file (e.g. .jpg, .png)." |
|
) |
|
), |
|
Tool( |
|
name = "Business Card Understanding", |
|
func=imun_businesscard.run, |
|
description=( |
|
"A wrapper around business card understanding. " |
|
"Useful after Image Understanding tool has recognized businesscard in the image tags." |
|
"This tool can find the actual business card text, name, address, email, website on the card." |
|
"Input should be an image url, or path to an image file (e.g. .jpg, .png)." |
|
) |
|
), |
|
Tool( |
|
name = "Layout Understanding", |
|
func=imun_layout.run, |
|
description=( |
|
"A wrapper around layout and table understanding. " |
|
"Useful after Image Understanding tool has recognized businesscard in the image tags." |
|
"This tool can find the actual business card text, name, address, email, website on the card." |
|
"Input should be an image url, or path to an image file (e.g. .jpg, .png)." |
|
) |
|
), |
|
Tool( |
|
name = "Celebrity Understanding", |
|
func=imun_celeb.run, |
|
description=( |
|
"A wrapper around celebrity understanding. " |
|
"Useful after Image Understanding tool has recognized people in the image tags that could be celebrities." |
|
"This tool can find the name of celebrities in the image." |
|
"Input should be an image url, or path to an image file (e.g. .jpg, .png)." |
|
) |
|
), |
|
BingSearchRun(api_wrapper=bing), |
|
Tool( |
|
name = "Photo Editing", |
|
func=edit_photo, |
|
description=( |
|
"A wrapper around photo editing. " |
|
"Useful to edit an image with a given instruction." |
|
"Input should be an image url, or path to an image file (e.g. .jpg, .png)." |
|
) |
|
), |
|
] |
|
|
|
|
|
|
|
chain = initialize_agent(tools, llm, agent="conversational-assistant", verbose=True, memory=memory, return_intermediate_steps=True, max_iterations=4) |
|
print("langchain reloaded") |
|
history = [] |
|
history.append(("Show me what you got!", "Hi Human, I am ready to serve!")) |
|
return history, history, chain |
|
|
|
|
|
def run_chain(chain, inp): |
|
|
|
|
|
output = "" |
|
try: |
|
output = chain.conversation(input=inp, keep_short=ARGS.noIntermediateConv) |
|
|
|
except AuthenticationError as ae: |
|
output = AUTH_ERR_MSG + str(datetime.datetime.now()) + ". " + str(ae) |
|
print("output", output) |
|
except RateLimitError as rle: |
|
output = "\n\nRateLimitError: " + str(rle) |
|
except ValueError as ve: |
|
output = "\n\nValueError: " + str(ve) |
|
except InvalidRequestError as ire: |
|
output = "\n\nInvalidRequestError: " + str(ire) |
|
except Exception as e: |
|
output = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e) |
|
|
|
return output |
|
|
|
|
|
class ChatWrapper: |
|
|
|
def __init__(self): |
|
self.lock = Lock() |
|
|
|
def __call__( |
|
self, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain] |
|
): |
|
"""Execute the chat functionality.""" |
|
self.lock.acquire() |
|
try: |
|
print("\n==== date/time: " + str(datetime.datetime.now()) + " ====") |
|
print("inp: " + inp) |
|
history = history or [] |
|
|
|
output = "Please paste your OpenAI key from openai.com to use this app. " + str(datetime.datetime.now()) |
|
|
|
|
|
|
|
outputs = run_chain(chain, inp) |
|
|
|
outputs = process_chain_output(outputs) |
|
|
|
print (" len(outputs) {}".format(len(outputs))) |
|
for i, output in enumerate(outputs): |
|
if i==0: |
|
history.append((inp, output)) |
|
else: |
|
history.append((None, output)) |
|
|
|
|
|
except Exception as e: |
|
raise e |
|
finally: |
|
self.lock.release() |
|
|
|
print (history) |
|
return history, history, "" |
|
|
|
|
|
def add_image(state, chain, image): |
|
global IMAGE_COUNT |
|
global ARGS |
|
IMAGE_COUNT = IMAGE_COUNT + 1 |
|
state = state or [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
url_input_for_chain = "http://0.0.0.0:{}/file={}".format(ARGS.port, image.name) |
|
|
|
|
|
url_input_for_chain = url_input_for_chain.replace("0.0.0.0", "10.123.124.92") |
|
|
|
|
|
|
|
|
|
outputs = run_chain(chain, url_input_for_chain) |
|
|
|
outputs = process_chain_output(outputs) |
|
|
|
print (" len(outputs) {}".format(len(outputs))) |
|
for i, output in enumerate(outputs): |
|
if i==0: |
|
|
|
state.append(((image.name,), output)) |
|
else: |
|
state.append((None, output)) |
|
|
|
|
|
|
|
print (state) |
|
return state, state |
|
|
|
def replace_with_image_markup(text): |
|
img_url = None |
|
text= text.strip() |
|
url_idx = text.rfind(" ") |
|
img_url = text[url_idx + 1:].strip() |
|
if img_url.endswith((".", "?")): |
|
img_url = img_url[:-1] |
|
|
|
|
|
|
|
return img_url |
|
|
|
def process_chain_output(outputs): |
|
global ARGS |
|
|
|
if isinstance(outputs, str): |
|
outputs = [outputs] |
|
elif isinstance(outputs, list): |
|
if ARGS.noIntermediateConv: |
|
cleanOutputs = [] |
|
for output in outputs: |
|
|
|
|
|
img_url = None |
|
|
|
if "assistant: here is the edited image " in output.lower(): |
|
img_url = replace_with_image_markup(output) |
|
cleanOutputs.append("Assistant: Here is the edited image") |
|
if img_url is not None: |
|
cleanOutputs.append((img_url,)) |
|
cleanOutputs.append(output) |
|
|
|
outputs = cleanOutputs |
|
|
|
|
|
|
|
return outputs |
|
|
|
|
|
def init_and_kick_off(): |
|
global ARGS |
|
|
|
chat = ChatWrapper() |
|
|
|
|
|
|
|
with gr.Blocks() as block: |
|
llm_state = gr.State() |
|
history_state = gr.State() |
|
chain_state = gr.State() |
|
|
|
|
|
|
|
reset_btn = gr.Button(value="!!!CLICK to wake up the AI!!!", variant="secondary", elem_id="resetbtn").style(full_width=False) |
|
|
|
with gr.Row(): |
|
chatbot = gr.Chatbot(elem_id="chatbot").style(height=620) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=0.75): |
|
message = gr.Textbox(label="What's on your mind??", |
|
placeholder="What's the answer to life, the universe, and everything?", |
|
lines=1) |
|
with gr.Column(scale=0.15): |
|
submit = gr.Button(value="Send", variant="secondary").style(full_width=False) |
|
with gr.Column(scale=0.10, min_width=0): |
|
btn = gr.UploadButton("📁", file_types=["image", "video", "audio"]) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=0.90): |
|
gr.HTML(""" |
|
<p>This application, developed by Cognitive Service Team Microsoft, demonstrates all cognitive service APIs in a conversational agent |
|
</p>""") |
|
|
|
|
|
|
|
message.submit(chat, inputs=[message, history_state, chain_state], |
|
outputs=[chatbot, history_state, message]) |
|
|
|
submit.click(chat, inputs=[message, history_state, chain_state], |
|
outputs=[chatbot, history_state, message]) |
|
|
|
btn.upload(add_image, inputs=[history_state, chain_state, btn], outputs=[history_state, chatbot]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
reset_btn.click(load_chain, inputs=[history_state], outputs=[chatbot, history_state, chain_state]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
block.launch(server_name="0.0.0.0", server_port = ARGS.port) |
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser() |
|
|
|
parser.add_argument('--port', type=int, required=False, default=7860) |
|
parser.add_argument('--openAIModel', type=str, required=False, default='openAIGPT35') |
|
parser.add_argument('--noIntermediateConv', default=False, action='store_true', help='if this flag is turned on no intermediate conversation should be shown') |
|
|
|
global ARGS |
|
ARGS = parser.parse_args() |
|
|
|
init_and_kick_off() |
|
|
|
|