HazlamiMalek commited on
Commit
6ca9a6b
·
verified ·
1 Parent(s): 3c4c972

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -89
app.py DELETED
@@ -1,89 +0,0 @@
1
- import streamlit as st
2
- from PIL import Image
3
- from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
4
- from gtts import gTTS
5
- import torch
6
- import subprocess
7
- import sys
8
-
9
- # Install transformers if not already installed
10
- try:
11
- import transformers
12
- except ImportError:
13
- subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers"])
14
- try:
15
- import transformers
16
- st.write("Transformers module is already installed!")
17
- except ImportError:
18
- st.write("Installing transformers...")
19
- subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers"])
20
- st.write("Transformers installed successfully!")
21
-
22
- st.write("Environment is working!")
23
- # Debug: Start of the app
24
- st.title("Image-to-Audio Description Generator")
25
-
26
- # Step 1: Load LLaVA Processor and Model
27
- st.write("Loading processor and model...")
28
- try:
29
- processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
30
- st.write("Processor loaded successfully!")
31
- except Exception as e:
32
- st.write(f"Error loading processor: {str(e)}")
33
-
34
- try:
35
- model = LlavaNextForConditionalGeneration.from_pretrained(
36
- "llava-hf/llava-v1.6-mistral-7b-hf",
37
- torch_dtype=torch.float16,
38
- low_cpu_mem_usage=True
39
- ).to("cuda:0")
40
- st.write("Model loaded successfully!")
41
- except Exception as e:
42
- st.write(f"Error loading model: {str(e)}")
43
-
44
- # Step 2: Upload Image
45
- uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
46
- if uploaded_image:
47
- st.write("Processing uploaded image...")
48
- # Load and preprocess image
49
- try:
50
- image = Image.open(uploaded_image).convert("RGB")
51
- image = image.resize((336, 336))
52
- st.image(image, caption="Uploaded Image", use_column_width=True)
53
- except Exception as e:
54
- st.write(f"Error loading image: {str(e)}")
55
-
56
- # Step 3: Generate Description
57
- st.write("Generating description...")
58
- try:
59
- conversation = [
60
- {
61
- "role": "user",
62
- "content": [
63
- {"type": "text", "text": "What is shown in this image?"},
64
- {"type": "image"},
65
- ],
66
- },
67
- ]
68
- prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
69
- inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")
70
- output = model.generate(
71
- **inputs, max_new_tokens=100, pad_token_id=processor.tokenizer.eos_token_id
72
- )
73
- description = processor.decode(output[0], skip_special_tokens=True)
74
- st.write(f"Generated Description: {description}")
75
- except Exception as e:
76
- st.write(f"Error generating description: {str(e)}")
77
-
78
- # Step 4: Text-to-Speech Conversion
79
- st.write("Converting description to audio...")
80
- try:
81
- tts = gTTS(description)
82
- audio_path = "output.mp3"
83
- tts.save(audio_path)
84
-
85
- # Step 5: Play Audio
86
- st.audio(audio_path, format="audio/mp3")
87
- st.write("Audio generated successfully!")
88
- except Exception as e:
89
- st.write(f"Error converting text to audio: {str(e)}")