Natthathida commited on
Commit
73cb49c
·
verified ·
1 Parent(s): 04f4242
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -4,8 +4,11 @@ import torchaudio
4
  from torchaudio.transforms import Resample
5
  import torch
6
  from io import BytesIO
 
 
7
 
8
  app = Flask(__name__)
 
9
 
10
  # Initialize TTS model from Hugging Face
11
  tts_model_name = "suno/bark"
@@ -16,9 +19,9 @@ model_id = "dblasko/blip-dalle3-img2prompt"
16
  blip_model = BlipForConditionalGeneration.from_pretrained(model_id)
17
  blip_processor = BlipProcessor.from_pretrained(model_id)
18
 
19
- def generate_caption(image):
20
  # Generate caption from image using Blip model
21
- inputs = blip_processor(images=image, return_tensors="pt")
22
  pixel_values = inputs.pixel_values
23
  generated_ids = blip_model.generate(pixel_values=pixel_values, max_length=50)
24
  generated_caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True, temperature=0.8, top_k=40, top_p=0.9)[0]
@@ -32,13 +35,13 @@ def generate_caption(image):
32
 
33
  @app.route('/upload', methods=['POST'])
34
  def upload_image():
35
- if 'image' not in request.files:
36
  return jsonify({'error': 'No image provided'}), 400
37
 
38
- image_file = request.files['image'].read()
39
  generated_caption, audio_path = generate_caption(image_file)
40
 
41
  return jsonify({'generated_caption': generated_caption, 'audio_url': audio_path}), 200
42
 
43
  if __name__ == '__main__':
44
- app.run(host='0.0.0.0', port=5000, debug=True)
 
4
  from torchaudio.transforms import Resample
5
  import torch
6
  from io import BytesIO
7
+ from PIL import Image
8
+ from flask_cors import CORS
9
 
10
  app = Flask(__name__)
11
+ CORS(app)
12
 
13
  # Initialize TTS model from Hugging Face
14
  tts_model_name = "suno/bark"
 
19
  blip_model = BlipForConditionalGeneration.from_pretrained(model_id)
20
  blip_processor = BlipProcessor.from_pretrained(model_id)
21
 
22
+ def generate_caption(file):
23
  # Generate caption from image using Blip model
24
+ inputs = blip_processor(files=file, return_tensors="pt")
25
  pixel_values = inputs.pixel_values
26
  generated_ids = blip_model.generate(pixel_values=pixel_values, max_length=50)
27
  generated_caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True, temperature=0.8, top_k=40, top_p=0.9)[0]
 
35
 
36
  @app.route('/upload', methods=['POST'])
37
  def upload_image():
38
+ if 'file' not in request.files:
39
  return jsonify({'error': 'No image provided'}), 400
40
 
41
+ image_file = request.files['file']
42
  generated_caption, audio_path = generate_caption(image_file)
43
 
44
  return jsonify({'generated_caption': generated_caption, 'audio_url': audio_path}), 200
45
 
46
  if __name__ == '__main__':
47
+ app.run(port=5000, debug=True)