Spaces:
Sleeping
Sleeping
File size: 3,926 Bytes
8b780e6 3ebfdcb 222452d 3ebfdcb c78ffbd 8b780e6 3ebfdcb a7a4adb 3ebfdcb 8b780e6 c78ffbd 8b780e6 cdda72d c78ffbd cdda72d c78ffbd cdda72d c78ffbd cdda72d c78ffbd cdda72d c78ffbd cdda72d c78ffbd cdda72d c78ffbd cdda72d 8b780e6 3ebfdcb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoModelForCausalLM, pipeline
# Use a pipeline as a high-level helper
pipe = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa", trust_remote_code=True)
# Load model directly
model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
def respond(message, history, system_message, max_tokens, temperature, top_p):
"""
Generates a response based on the user message and chat history.
Args:
message (str): The user message.
history (list): A list of tuples containing user and assistant messages.
system_message (str): The system message.
max_tokens (int): Maximum number of tokens for the response.
temperature (float): Temperature for the response generation.
top_p (float): Top-p for nucleus sampling.
Yields:
str: The generated response.
"""
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
def process_video(video):
"""
Processes the uploaded video file.
Args:
video (gr.Video): The uploaded video file.
Returns:
str: Confirmation message for the uploaded video.
"""
return f"Processing video: {video.name}"
def process_pdf(pdf):
"""
Processes the uploaded PDF file.
Args:
pdf (gr.File): The uploaded PDF file.
Returns:
str: Confirmation message for the uploaded PDF.
"""
return f"Processing PDF: {pdf.name}"
def process_image(image):
"""
Processes the uploaded image file.
Args:
image (gr.Image): The uploaded image file.
Returns:
str: Confirmation message for the uploaded image.
"""
return f"Processing image: {image.name}"
# Define upload interfaces
video_upload = gr.Interface(fn=process_video, inputs=gr.Video(), outputs="text", title="Upload a Video")
pdf_upload = gr.Interface(fn=process_pdf, inputs=gr.File(file_types=['.pdf']), outputs="text", title="Upload a PDF")
image_upload = gr.Interface(fn=process_image, inputs=gr.Image(), outputs="text", title="Upload an Image")
# Combine upload interfaces into tabs
tabbed_interface = gr.TabbedInterface([video_upload, pdf_upload, image_upload], ["Video", "PDF", "Image"])
# Main Gradio interface
demo = gr.Blocks()
with demo:
with gr.Tab("Chat Interface"):
gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
with gr.Tab("Upload Files"):
tabbed_interface
if __name__ == "__main__":
demo.launch()
|