Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,11 +5,13 @@ import torch
|
|
5 |
import requests
|
6 |
from PIL import Image
|
7 |
import io
|
8 |
-
from huggingface_hub import login
|
9 |
import os
|
10 |
|
|
|
11 |
read_token = os.getenv('AccToken')
|
12 |
login(read_token)
|
|
|
13 |
# Define a dictionary of conversational models
|
14 |
conversational_models = {
|
15 |
"Qwen": "Qwen/QwQ-32B",
|
@@ -48,17 +50,14 @@ document_qa_pipeline = pipeline("question-answering", model="deepset/roberta-bas
|
|
48 |
image_classification_pipeline = pipeline("image-classification", model="facebook/detr-resnet-50") # This will be replaced
|
49 |
object_detection_pipeline = pipeline("object-detection", model="facebook/detr-resnet-50")
|
50 |
video_classification_pipeline = pipeline("video-classification", model="facebook/timesformer-base-finetuned-k400")
|
51 |
-
# Removed text_to_3d_pipeline as it was causing issues
|
52 |
-
# Removed Keypoint Detection Pipeline
|
53 |
-
# Removed Translation pipeline as it was causing issues
|
54 |
summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn")
|
55 |
text_to_audio_pipeline = pipeline("text-to-audio", model="stabilityai/stable-audio-open-1.0")
|
56 |
audio_classification_pipeline = pipeline("audio-classification", model="facebook/wav2vec2-base")
|
57 |
|
58 |
def load_conversational_model(model_name):
|
59 |
if model_name not in conversational_models_loaded:
|
60 |
-
tokenizer = AutoTokenizer.from_pretrained(conversational_models[model_name])
|
61 |
-
model = AutoModelForCausalLM.from_pretrained(conversational_models[model_name])
|
62 |
conversational_tokenizers[model_name] = tokenizer
|
63 |
conversational_models_loaded[model_name] = model
|
64 |
return conversational_tokenizers[model_name], conversational_models_loaded[model_name]
|
@@ -85,14 +84,18 @@ def chat(model_name, user_input, history=[]):
|
|
85 |
|
86 |
def generate_image(model_name, prompt):
|
87 |
if model_name not in text_to_image_pipelines:
|
88 |
-
text_to_image_pipelines[model_name] = StableDiffusionPipeline.from_pretrained(
|
|
|
|
|
89 |
pipeline = text_to_image_pipelines[model_name]
|
90 |
image = pipeline(prompt).images[0]
|
91 |
return image
|
92 |
|
93 |
def generate_speech(model_name, text):
|
94 |
if model_name not in text_to_speech_pipelines:
|
95 |
-
text_to_speech_pipelines[model_name] = pipeline(
|
|
|
|
|
96 |
pipeline = text_to_speech_pipelines[model_name]
|
97 |
audio = pipeline(text)
|
98 |
return audio["audio"]
|
@@ -195,9 +198,6 @@ with gr.Blocks() as demo:
|
|
195 |
|
196 |
video_classification_generate.click(video_classification, inputs=video_classification_video, outputs=video_classification_output)
|
197 |
|
198 |
-
# Removed Text-to-3D tab as it was causing issues
|
199 |
-
# Removed Keypoint Detection tab due to issues.
|
200 |
-
|
201 |
with gr.Tab("Summarization"):
|
202 |
summarize_text_text = gr.Textbox(label="Text")
|
203 |
summarize_text_generate = gr.Button("Summarize")
|
|
|
5 |
import requests
|
6 |
from PIL import Image
|
7 |
import io
|
8 |
+
from huggingface_hub import login # Correct import for authentication
|
9 |
import os
|
10 |
|
11 |
+
# Read the Hugging Face access token from the environment variable
|
12 |
read_token = os.getenv('AccToken')
|
13 |
login(read_token)
|
14 |
+
|
15 |
# Define a dictionary of conversational models
|
16 |
conversational_models = {
|
17 |
"Qwen": "Qwen/QwQ-32B",
|
|
|
50 |
image_classification_pipeline = pipeline("image-classification", model="facebook/detr-resnet-50") # This will be replaced
|
51 |
object_detection_pipeline = pipeline("object-detection", model="facebook/detr-resnet-50")
|
52 |
video_classification_pipeline = pipeline("video-classification", model="facebook/timesformer-base-finetuned-k400")
|
|
|
|
|
|
|
53 |
summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn")
|
54 |
text_to_audio_pipeline = pipeline("text-to-audio", model="stabilityai/stable-audio-open-1.0")
|
55 |
audio_classification_pipeline = pipeline("audio-classification", model="facebook/wav2vec2-base")
|
56 |
|
57 |
def load_conversational_model(model_name):
|
58 |
if model_name not in conversational_models_loaded:
|
59 |
+
tokenizer = AutoTokenizer.from_pretrained(conversational_models[model_name], use_auth_token=read_token)
|
60 |
+
model = AutoModelForCausalLM.from_pretrained(conversational_models[model_name], use_auth_token=read_token)
|
61 |
conversational_tokenizers[model_name] = tokenizer
|
62 |
conversational_models_loaded[model_name] = model
|
63 |
return conversational_tokenizers[model_name], conversational_models_loaded[model_name]
|
|
|
84 |
|
85 |
def generate_image(model_name, prompt):
|
86 |
if model_name not in text_to_image_pipelines:
|
87 |
+
text_to_image_pipelines[model_name] = StableDiffusionPipeline.from_pretrained(
|
88 |
+
text_to_image_models[model_name], use_auth_token=read_token
|
89 |
+
)
|
90 |
pipeline = text_to_image_pipelines[model_name]
|
91 |
image = pipeline(prompt).images[0]
|
92 |
return image
|
93 |
|
94 |
def generate_speech(model_name, text):
|
95 |
if model_name not in text_to_speech_pipelines:
|
96 |
+
text_to_speech_pipelines[model_name] = pipeline(
|
97 |
+
"text-to-speech", model=text_to_speech_models[model_name], use_auth_token=read_token
|
98 |
+
)
|
99 |
pipeline = text_to_speech_pipelines[model_name]
|
100 |
audio = pipeline(text)
|
101 |
return audio["audio"]
|
|
|
198 |
|
199 |
video_classification_generate.click(video_classification, inputs=video_classification_video, outputs=video_classification_output)
|
200 |
|
|
|
|
|
|
|
201 |
with gr.Tab("Summarization"):
|
202 |
summarize_text_text = gr.Textbox(label="Text")
|
203 |
summarize_text_generate = gr.Button("Summarize")
|