BarBar288 commited on
Commit
325e7ed
·
verified ·
1 Parent(s): 552c89e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -19
app.py CHANGED
@@ -4,8 +4,7 @@ from diffusers import StableDiffusionPipeline
4
  import torch
5
  import os
6
  import logging
7
- from huggingface_hub import login, hf_hub_download
8
- import requests
9
  import accelerate
10
 
11
  # Set up logging
@@ -56,12 +55,12 @@ text_to_speech_pipelines = {}
56
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
57
  logger.info(f"Device set to use {device}")
58
 
59
- visual_qa_pipeline = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa")
60
- document_qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
61
- image_classification_pipeline = pipeline("image-classification", model="facebook/deit-base-distilled-patch16-224")
62
- object_detection_pipeline = pipeline("object-detection", model="facebook/detr-resnet-50")
63
- video_classification_pipeline = pipeline("video-classification", model="facebook/timesformer-base-finetuned-k400")
64
- summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn")
65
 
66
  # Load speaker embeddings for text-to-audio
67
  def load_speaker_embeddings(model_name):
@@ -75,14 +74,14 @@ def load_speaker_embeddings(model_name):
75
 
76
  # Use a different model for text-to-audio if stabilityai/stable-audio-open-1.0 is not supported
77
  try:
78
- text_to_audio_pipeline = pipeline("text-to-audio", model="stabilityai/stable-audio-open-1.0")
79
  except ValueError as e:
80
  logger.error(f"Error loading stabilityai/stable-audio-open-1.0: {e}")
81
  logger.info("Falling back to a different text-to-audio model.")
82
- text_to_audio_pipeline = pipeline("text-to-audio", model="microsoft/speecht5_tts")
83
  speaker_embeddings = load_speaker_embeddings("microsoft/speecht5_tts")
84
 
85
- audio_classification_pipeline = pipeline("audio-classification", model="facebook/wav2vec2-base")
86
 
87
  def load_conversational_model(model_name):
88
  if model_name not in conversational_models_loaded:
@@ -92,11 +91,22 @@ def load_conversational_model(model_name):
92
  use_auth_token=read_token,
93
  trust_remote_code=True
94
  )
95
- model = AutoModelForCausalLM.from_pretrained(
96
- conversational_models[model_name],
97
- use_auth_token=read_token,
98
- trust_remote_code=True
99
- )
 
 
 
 
 
 
 
 
 
 
 
100
  conversational_tokenizers[model_name] = tokenizer
101
  conversational_models_loaded[model_name] = model
102
  return conversational_tokenizers[model_name], conversational_models_loaded[model_name]
@@ -105,7 +115,7 @@ def chat(model_name, user_input, history=[]):
105
  tokenizer, model = load_conversational_model(model_name)
106
 
107
  # Encode the input
108
- input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors="pt")
109
 
110
  # Generate a response
111
  with torch.no_grad():
@@ -127,8 +137,8 @@ def generate_image(model_name, prompt):
127
  text_to_image_pipelines[model_name] = StableDiffusionPipeline.from_pretrained(
128
  text_to_image_models[model_name],
129
  use_auth_token=read_token,
130
- torch_dtype=torch.float16,
131
- device_map="auto"
132
  )
133
  pipeline = text_to_image_pipelines[model_name]
134
  image = pipeline(prompt).images[0]
 
4
  import torch
5
  import os
6
  import logging
7
+ from huggingface_hub import login
 
8
  import accelerate
9
 
10
  # Set up logging
 
55
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
56
  logger.info(f"Device set to use {device}")
57
 
58
+ visual_qa_pipeline = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa", device=device)
59
+ document_qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2", device=device)
60
+ image_classification_pipeline = pipeline("image-classification", model="facebook/deit-base-distilled-patch16-224", device=device)
61
+ object_detection_pipeline = pipeline("object-detection", model="facebook/detr-resnet-50", device=device)
62
+ video_classification_pipeline = pipeline("video-classification", model="facebook/timesformer-base-finetuned-k400", device=device)
63
+ summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
64
 
65
  # Load speaker embeddings for text-to-audio
66
  def load_speaker_embeddings(model_name):
 
74
 
75
  # Use a different model for text-to-audio if stabilityai/stable-audio-open-1.0 is not supported
76
  try:
77
+ text_to_audio_pipeline = pipeline("text-to-audio", model="stabilityai/stable-audio-open-1.0", device=device)
78
  except ValueError as e:
79
  logger.error(f"Error loading stabilityai/stable-audio-open-1.0: {e}")
80
  logger.info("Falling back to a different text-to-audio model.")
81
+ text_to_audio_pipeline = pipeline("text-to-audio", model="microsoft/speecht5_tts", device=device)
82
  speaker_embeddings = load_speaker_embeddings("microsoft/speecht5_tts")
83
 
84
+ audio_classification_pipeline = pipeline("audio-classification", model="facebook/wav2vec2-base", device=device)
85
 
86
  def load_conversational_model(model_name):
87
  if model_name not in conversational_models_loaded:
 
91
  use_auth_token=read_token,
92
  trust_remote_code=True
93
  )
94
+ try:
95
+ model = AutoModelForCausalLM.from_pretrained(
96
+ conversational_models[model_name],
97
+ use_auth_token=read_token,
98
+ trust_remote_code=True,
99
+ device_map="auto" if torch.cuda.is_available() else "cpu"
100
+ )
101
+ except RuntimeError as e:
102
+ logger.error(f"RuntimeError: {e}")
103
+ logger.info("Falling back to CPU for the model.")
104
+ model = AutoModelForCausalLM.from_pretrained(
105
+ conversational_models[model_name],
106
+ use_auth_token=read_token,
107
+ trust_remote_code=True,
108
+ device_map="cpu"
109
+ )
110
  conversational_tokenizers[model_name] = tokenizer
111
  conversational_models_loaded[model_name] = model
112
  return conversational_tokenizers[model_name], conversational_models_loaded[model_name]
 
115
  tokenizer, model = load_conversational_model(model_name)
116
 
117
  # Encode the input
118
+ input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors="pt").to(device)
119
 
120
  # Generate a response
121
  with torch.no_grad():
 
137
  text_to_image_pipelines[model_name] = StableDiffusionPipeline.from_pretrained(
138
  text_to_image_models[model_name],
139
  use_auth_token=read_token,
140
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
141
+ device_map="auto" if torch.cuda.is_available() else "cpu"
142
  )
143
  pipeline = text_to_image_pipelines[model_name]
144
  image = pipeline(prompt).images[0]