Llama_3.1_API / app.py
contenteaseAI's picture
Create app.py
fd3db54 verified
raw
history blame
2.11 kB
import gradio as gr
from openai import OpenAI
import os
css = '''
.gradio-container{max-width: 1000px !important}
h1{text-align:center}
footer {
visibility: hidden
}
'''
ACCESS_TOKEN = os.getenv("HF_TOKEN")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
def respond(
message,
history,
max_tokens,
temperature,
system_message=""" Extract the following information from the given text:
Identify the specific areas where the work needs to be done and Add the furniture that has to be changed.
Do not specify the work that has to be done.
Format the extracted information in the following JSON structure:
{
"Area Type1": {
"Furniture1",
"Furniture2",
...
}
"Area Type2": {
"Furniture1",
"Furniture2",
...
}
}""",
):
messages = [{"role": "system", "content": system_message}]
if len(history) == 0:
pass
else:
history.pop()
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat.completions.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct",
max_tokens=max_tokens,
stream=True,
temperature=temperature,
messages=messages,
):
token = message.choices[0].delta.content
response += token
yield response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
],
css=css,
theme="allenai/gradio-theme",
)
if __name__ == "__main__":
demo.launch(debug = True)