EIL-Demo / app.py
kh-CHEUNG's picture
Update app.py
081d6b8 verified
raw
history blame
8.88 kB
import torch
import spaces
import gradio as gr
import pandas as pd
from threading import Thread
import re
import time
import tempfile
import os
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
from sentence_transformers import SentenceTransformer
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import SentenceTransformersTokenTextSplitter
from PIL import Image
HF_TOKEN = os.environ["Inference_Calls"]
print(HF_TOKEN)
# from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer
# processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
# model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
"""
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
token=HF_TOKEN
).to("cuda:0")
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
"""
from huggingface_hub import InferenceClient
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
client = InferenceClient(model=model_id, token="HF_TOKEN")
print("Client object created!")
embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
ASR_MODEL_NAME = "openai/whisper-large-v3"
ASR_BATCH_SIZE = 8
ASR_CHUNK_LENGTH_S = 30
TEMP_FILE_LIMIT_MB = 1024 #2048
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
device = 0 if torch.cuda.is_available() else "cpu"
asr_pl = pipeline(
task="automatic-speech-recognition",
model=ASR_MODEL_NAME,
chunk_length_s=ASR_CHUNK_LENGTH_S,
device=device,
)
application_title = "Enlight Innovations Limited -- Demo"
application_description = "This demo is designed to illustrate our basic ideas and feasibility in implementation."
# Chatbot Interface functions
@spaces.GPU
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
# messages = [{"role": "system", "content": system_message}]
# for val in history:
# if val[0]:
# messages.append({"role": "user", "content": val[0]})
# if val[1]:
# messages.append({"role": "assistant", "content": val[1]})
# messages.append({"role": "user", "content": message})
messages =[
{ "role": "user", "content": "What is Python Programming?" },
]
print(messages)
response = ""
for message in client.chat.completions.create( #client.chat_completion(
messages=messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
# Transcribe Interface functions
@spaces.GPU
def transcribe(asr_inputs, task):
#print("Type: " + str(type(asr_inputs)))
if asr_inputs is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
text = asr_pl(asr_inputs, batch_size=ASR_BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
return text.strip()
# Profile Interface functions
def load_profiles():
try:
return pd.read_csv("temp_profiles.csv")
except FileNotFoundError:
return pd.DataFrame()
def save_profile(profile_data):
df = load_profiles()
df = df.append(profile_data, ignore_index=True)
df.to_csv("temp_profiles.csv", index=False)
def lookup_profile():
df = load_profiles()
assessment_id = assessment_id_input.value.strip()
if not assessment_id:
status.update("Please enter an Assessment ID", color="red")
return
results = df[df["Assessment_ID"].str.contains(assessment_id, case=False)]
if results.empty:
status.update("No profiles found for this ID", color="red")
return
profile_preview.update(
value=results.to_markdown(index=False)
)
status.update("Profile(s) found!", color="green")
"""Gradio User Interface"""
#audio_input = gr.Audio(sources="upload", type="filepath", label="Audio: from file") #gr.Audio(sources="microphone", type="filepath", label="Audio: from microphone")
#audio_input_choice = gr.Radio(["audio file", "microphone"], label="Audio Input Source", value="audio file") #
# Profile Interface components
with gr.Blocks() as profile_interface:
# Profile Lookup Section
with gr.Column():
assessment_id_input = gr.Textbox(
label="Assessment Object/Session ID",
placeholder="Enter ID here...",
required=True
)
lookup_btn = gr.Button("Lookup Profile", variant="primary")
clear_btn = gr.Button("Clear Results", variant="secondary")
status = gr.Status(elem_classes="status-container")
profile_preview = gr.Markdown(label="Profile Results", value="")
# Event Bindings
lookup_btn.click(lookup_profile, None, None)
clear_btn.click(lambda: profile_preview.update(""), status.update("", color=""))
assessment_id_input.change(lambda: status.update("", color=""), None, None)
# Load existing profiles on startup
load_profiles()
# Profile Interface
# Transcribe Interface components
audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio Input Source")
task_input_choice = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
task_output = gr.Textbox(label="Transcribed Output")
# Transcribe Interface
transcribe_interface = gr.Interface(
fn=transcribe,
inputs=[
audio_input,
#audio_input_choice,
task_input_choice,
],
outputs=[
task_output, #"text",
],
title=application_title,
description=application_description,
allow_flagging="never",
)
# ChatInterface components
chatbot_main = gr.Chatbot(label="Extraction Output")
chatbot_main_input = gr.MultimodalTextbox({"text": "Choose the referred material(s) and ask your question.", "files":[]})
chatbot_sys_output = gr.Textbox(value="You are a friendly Chatbot.", label="System Message")
chatbot_max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max. New Tokens")
chatbot_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.1, step=0.1, label="Temperature")
chatbot_top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
)
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
chat_interface = gr.ChatInterface(
respond,
multimodal=True,
title=application_title,
description=application_description,
chatbot=chatbot_main,
textbox=chatbot_main_input,
additional_inputs=[
chatbot_sys_output,
chatbot_max_tokens,
chatbot_temperature,
chatbot_top_p,
],
)
with gr.Blocks() as demo:
gr.TabbedInterface([profile_interface, transcribe_interface, chat_interface], ["Step 0: Profile", "Step 1: Transcribe", "Step 2: Extract"])
"""
def clear_audio_input():
return None
"""
def update_task_input(task_input_choice):
if task_input_choice == "transcribe":
return gr.Textbox(label="Transcribed Output") #Audio(sources="upload", label="Audio: from file")
elif task_input_choice == "translate":
return gr.Textbox(label="Translated Output") #Audio(sources="microphone", label="Audio: from microphone")
#task_input_choice.input(fn=clear_audio_input, outputs=audio_input).then(fn=update_audio_input,
task_input_choice.input(fn=update_task_input,
inputs=task_input_choice,
outputs=task_output
)
def update_chatbot_main_input(updated_text):
return {"text": updated_text, "files":[]}
task_output.change(fn=update_chatbot_main_input,
inputs=task_output,
outputs=chatbot_main_input
)
if __name__ == "__main__":
demo.queue().launch() #demo.launch()