File size: 3,366 Bytes
80d3afc
ace8c2c
be95987
80d3afc
 
 
7a49355
d0ccbf6
 
25dfad2
6919ace
3684d73
241921c
 
5c795c7
 
 
6919ace
ace8c2c
360f345
7de5168
94ff692
360f345
8e569e2
360f345
 
94ff692
360f345
 
031aa40
7a49355
336cc86
241921c
563878a
241921c
563878a
80d3afc
e75bad7
 
 
a36a82c
e75bad7
 
360f345
f45a806
 
22ebce8
22a1dd5
 
 
 
 
 
a84efc1
 
 
22a1dd5
 
 
 
 
a84efc1
 
 
22a1dd5
 
66f39f2
 
aa69ed9
 
80d3afc
5b0d141
0359360
3416cce
80d3afc
 
768bc38
80d3afc
c4615f5
3416cce
80d3afc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gradio as gr
import json, os, vertexai, wandb

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

credentials = os.environ["CREDENTIALS"]
project = os.environ["PROJECT"]
wandb_api_key = os.environ["WANDB_API_KEY"]

config = {
    "max_output_tokens": 800,
    #"model": "text-bison@001",
    "model": "gemini-pro",
    "temperature": 0.1,
    "top_k": 40,
    "top_p": 1.0,
}

credentials = json.loads(credentials)

from google.oauth2 import service_account
credentials = service_account.Credentials.from_service_account_info(credentials)

if credentials.expired:
    credentials.refresh(Request())

vertexai.init(project = project, 
              location = "us-central1",
              credentials = credentials
             )

#from vertexai.language_models import TextGenerationModel
#generation_model = TextGenerationModel.from_pretrained(config["model"])
from vertexai.preview.generative_models import GenerativeModel
generation_model = GenerativeModel(config["model"])

def wandb_log(prompt, completion):
    wandb.login(key = wandb_api_key)
    wandb.init(project = "vertex-ai-llm", config = config)
    wandb.log({"prompt": str(prompt), "completion": str(completion)})
    wandb.finish()

def invoke(prompt):
    if (prompt == ""):
        raise gr.Error("Prompt is required.")
    completion = ""
    try:
        #completion = generation_model.predict(prompt = prompt,
        #                                      max_output_tokens = config["max_output_tokens"],
        #                                      temperature = config["temperature"],
        #                                      top_k = config["top_k"],
        #                                      top_p = config["top_p"],
        #                                     )
        #if (completion.text != None):
        #    completion = completion.text
        completion = generation_model.generate_content(prompt, generation_config = {
                                                                   "max_output_tokens": config["max_output_tokens"],
                                                                   "temperature": config["temperature"],
                                                                   "top_k": config["top_k"],
                                                                   "top_p": config["top_p"],
                                                               })
        if (completion.text != None):
            completion = completion.text
    except Exception as e:
        raise gr.Error(e)
    finally:
        wandb_log(prompt, completion)
    return completion
    #return "🛑 Execution is commented out. To view the source code see https://huggingface.co/spaces/bstraehle/google-vertex-ai-llm/tree/main."

description = """<a href='https://www.gradio.app/'>Gradio</a> UI using <a href='https://cloud.google.com/vertex-ai?hl=en/'>Google Vertex AI</a> API 
                 with gemini-pro foundation model. RAG evaluation via <a href='https://wandb.ai/bstraehle'>Weights & Biases</a>."""

gr.close_all()
demo = gr.Interface(fn=invoke, 
                    inputs = [gr.Textbox(label = "Prompt", lines = 1)],
                    outputs = [gr.Textbox(label = "Completion", lines = 1)],
                    title = "Generative AI - LLM",
                    description = description)
demo.launch()