joermd commited on
Commit
5b492c0
·
verified ·
1 Parent(s): 31715cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +165 -22
app.py CHANGED
@@ -1,23 +1,166 @@
 
 
 
 
1
  import gradio as gr
2
- from pytube import YouTube
3
-
4
- def download_video(url):
5
- try:
6
- yt = YouTube(url)
7
- stream = yt.streams.get_highest_resolution()
8
- stream.download(output_path="downloads", filename=f"{yt.title}.mp4")
9
- return f"Downloaded: {yt.title}.mp4"
10
- except Exception as e:
11
- return f"An error occurred: {e}"
12
-
13
- # Create a Gradio interface
14
- iface = gr.Interface(
15
- fn=download_video,
16
- inputs=gr.inputs.Textbox(lines=2, placeholder="Enter YouTube URL..."),
17
- outputs="text",
18
- title="YouTube Video Downloader",
19
- description="Enter the YouTube video URL and click 'Submit' to download the video."
20
- )
21
-
22
- # Launch the interface
23
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import os
3
+ import re
4
+ import time
5
  import gradio as gr
6
+ import torch
7
+ from transformers import AutoModelForCausalLM
8
+ from transformers import TextIteratorStreamer
9
+ from threading import Thread
10
+
11
+ model_name = 'AIDC-AI/Ovis1.6-Llama3.2-3B'
12
+
13
+ # load model
14
+ model = AutoModelForCausalLM.from_pretrained(model_name,
15
+ torch_dtype=torch.bfloat16,
16
+ multimodal_max_length=8192,
17
+ token=os.getenv('HUGGINGFACE_TOKEN'),
18
+ trust_remote_code=True).to(device='cuda')
19
+ text_tokenizer = model.get_text_tokenizer()
20
+ visual_tokenizer = model.get_visual_tokenizer()
21
+ streamer = TextIteratorStreamer(text_tokenizer, skip_prompt=True, skip_special_tokens=True)
22
+ image_placeholder = '<image>'
23
+ cur_dir = os.path.dirname(os.path.abspath(__file__))
24
+
25
+ def submit_chat(chatbot, text_input):
26
+ response = ''
27
+ chatbot.append((text_input, response))
28
+ return chatbot ,''
29
+
30
+ @spaces.GPU
31
+ def ovis_chat(chatbot, image_input):
32
+ # preprocess inputs
33
+ conversations = []
34
+ response = ""
35
+ text_input = chatbot[-1][0]
36
+ for query, response in chatbot[:-1]:
37
+ conversations.append({
38
+ "from": "human",
39
+ "value": query
40
+ })
41
+ conversations.append({
42
+ "from": "gpt",
43
+ "value": response
44
+ })
45
+ text_input = text_input.replace(image_placeholder, '')
46
+ conversations.append({
47
+ "from": "human",
48
+ "value": text_input
49
+ })
50
+ if image_input is not None:
51
+ conversations[0]["value"] = image_placeholder + '\n' + conversations[0]["value"]
52
+ prompt, input_ids, pixel_values = model.preprocess_inputs(conversations, [image_input])
53
+ attention_mask = torch.ne(input_ids, text_tokenizer.pad_token_id)
54
+ input_ids = input_ids.unsqueeze(0).to(device=model.device)
55
+ attention_mask = attention_mask.unsqueeze(0).to(device=model.device)
56
+ if image_input is None:
57
+ pixel_values = [None]
58
+ else:
59
+ pixel_values = [pixel_values.to(dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]
60
+
61
+ with torch.inference_mode():
62
+ gen_kwargs = dict(
63
+ max_new_tokens=512,
64
+ do_sample=False,
65
+ top_p=None,
66
+ top_k=None,
67
+ temperature=None,
68
+ repetition_penalty=None,
69
+ eos_token_id=model.generation_config.eos_token_id,
70
+ pad_token_id=text_tokenizer.pad_token_id,
71
+ use_cache=True
72
+ )
73
+ response = ""
74
+ thread = Thread(target=model.generate,
75
+ kwargs={"inputs": input_ids,
76
+ "pixel_values": pixel_values,
77
+ "attention_mask": attention_mask,
78
+ "streamer": streamer,
79
+ **gen_kwargs})
80
+ thread.start()
81
+ for new_text in streamer:
82
+ response += new_text
83
+ chatbot[-1][1] = response
84
+ yield chatbot
85
+ thread.join()
86
+ # debug
87
+ print('*'*60)
88
+ print('*'*60)
89
+ print('OVIS_CONV_START')
90
+ for i, (request, answer) in enumerate(chatbot[:-1], 1):
91
+ print(f'Q{i}:\n {request}')
92
+ print(f'A{i}:\n {answer}')
93
+ print('New_Q:\n', text_input)
94
+ print('New_A:\n', response)
95
+ print('OVIS_CONV_END')
96
+
97
+ def clear_chat():
98
+ return [], None, ""
99
+
100
+ with open(f"{cur_dir}/resource/logo.svg", "r", encoding="utf-8") as svg_file:
101
+ svg_content = svg_file.read()
102
+ font_size = "2.5em"
103
+ svg_content = re.sub(r'(<svg[^>]*)(>)', rf'\1 height="{font_size}" style="vertical-align: middle; display: inline-block;"\2', svg_content)
104
+ html = f"""
105
+ <p align="center" style="font-size: {font_size}; line-height: 1;">
106
+ <span style="display: inline-block; vertical-align: middle;">{svg_content}</span>
107
+ <span style="display: inline-block; vertical-align: middle;">{model_name.split('/')[-1]}</span>
108
+ </p>
109
+ <center><font size=3><b>Ovis</b> has been open-sourced on <a href='https://huggingface.co/{model_name}'>😊 Huggingface</a> and <a href='https://github.com/AIDC-AI/Ovis'>🌟 GitHub</a>. If you find Ovis useful, a like❤️ or a star🌟 would be appreciated.</font></center>
110
+ """
111
+
112
+ latex_delimiters_set = [{
113
+ "left": "\\(",
114
+ "right": "\\)",
115
+ "display": False
116
+ }, {
117
+ "left": "\\begin{equation}",
118
+ "right": "\\end{equation}",
119
+ "display": True
120
+ }, {
121
+ "left": "\\begin{align}",
122
+ "right": "\\end{align}",
123
+ "display": True
124
+ }, {
125
+ "left": "\\begin{alignat}",
126
+ "right": "\\end{alignat}",
127
+ "display": True
128
+ }, {
129
+ "left": "\\begin{gather}",
130
+ "right": "\\end{gather}",
131
+ "display": True
132
+ }, {
133
+ "left": "\\begin{CD}",
134
+ "right": "\\end{CD}",
135
+ "display": True
136
+ }, {
137
+ "left": "\\[",
138
+ "right": "\\]",
139
+ "display": True
140
+ }]
141
+
142
+ text_input = gr.Textbox(label="prompt", placeholder="Enter your text here...", lines=1, container=False)
143
+ with gr.Blocks(title=model_name.split('/')[-1], theme=gr.themes.Ocean()) as demo:
144
+ gr.HTML(html)
145
+ with gr.Row():
146
+ with gr.Column(scale=3):
147
+ image_input = gr.Image(label="image", height=350, type="pil")
148
+ gr.Examples(
149
+ examples=[
150
+ [f"{cur_dir}/examples/case1.png", "explain this model to me."],
151
+ [f"{cur_dir}/examples/case2.png", "Which city is in the picture?"],
152
+ ],
153
+ inputs=[image_input, text_input]
154
+ )
155
+ with gr.Column(scale=7):
156
+ chatbot = gr.Chatbot(label="Ovis", layout="panel", height=600, show_copy_button=True, latex_delimiters=latex_delimiters_set)
157
+ text_input.render()
158
+ with gr.Row():
159
+ send_btn = gr.Button("Send", variant="primary")
160
+ clear_btn = gr.Button("Clear", variant="secondary")
161
+
162
+ send_click_event = send_btn.click(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
163
+ submit_event = text_input.submit(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
164
+ clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
165
+
166
+ demo.launch()