VDNT11 commited on
Commit
ba70d3f
·
verified ·
1 Parent(s): 977a686

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -32
app.py CHANGED
@@ -1,30 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import torch
3
  import librosa
4
  import matplotlib.pyplot as plt
5
  from PIL import Image
6
- import os
7
-
8
- # Import the required functions and classes from your previous code
9
- from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
10
  import torchaudio
11
- import torch
12
  from transformers import (
 
 
 
13
  AutoModelForSeq2SeqLM,
14
  AutoTokenizer,
 
15
  )
16
- from IndicTransToolkit import IndicProcessor
17
- from transformers import BitsAndBytesConfig
18
- from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
19
- from diffusers import StableDiffusionImg2ImgPipeline
20
  import stanza
 
 
 
21
 
22
- # Ensure you have the same TransGen class and other supporting functions from your previous implementation
23
  class TransGen:
24
- def __init__(self, translation_model="ai4bharat/indictrans2-indic-en-1B",
25
- stable_diff_model="stabilityai/stable-diffusion-2-base",
26
- src_lang='hin_Deva', tgt_lang='eng_Latn'):
27
- # Same implementation as in your previous code
 
 
 
28
  self.bnb_config = BitsAndBytesConfig(load_in_4bit=True)
29
  self.tokenizer = AutoTokenizer.from_pretrained(translation_model, trust_remote_code=True)
30
  self.model = AutoModelForSeq2SeqLM.from_pretrained(translation_model, trust_remote_code=True, quantization_config=self.bnb_config)
@@ -40,7 +60,6 @@ class TransGen:
40
  self.img2img_pipe = self.img2img_pipe.to('cuda')
41
 
42
  def translate(self, input_sentences):
43
- # Same implementation as in your previous code
44
  batch = self.ip.preprocess_batch(
45
  input_sentences,
46
  src_lang=self.src_lang,
@@ -72,11 +91,9 @@ class TransGen:
72
  )
73
 
74
  translations = self.ip.postprocess_batch(generated_tokens, lang=self.tgt_lang)
75
-
76
  return translations
77
 
78
  def generate_image(self, prompt, prev_image, strength=1.0, guidance_scale=7.5):
79
- # Same implementation as in your previous code
80
  strength = float(strength) if strength is not None else 1.0
81
  guidance_scale = float(guidance_scale) if guidance_scale is not None else 7.5
82
 
@@ -96,18 +113,12 @@ class TransGen:
96
  return image.images[0]
97
 
98
  def run(self, input_sentences, strength, guidance_scale, prev_image=None):
99
- # Same implementation as in your previous code
100
  translations = self.translate(input_sentences)
101
  sentence = translations[0]
102
  image = self.generate_image(sentence, prev_image, strength, guidance_scale)
103
  return sentence, image
104
 
105
- # Initialize global variables
106
- stanza.download('hi')
107
- transgen = TransGen()
108
-
109
  def transcribe_audio_to_hindi(audio_path: str) -> str:
110
- # Same implementation as in your previous code
111
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
112
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
113
 
@@ -138,25 +149,23 @@ def transcribe_audio_to_hindi(audio_path: str) -> str:
138
  result = whisper_pipe(waveform.squeeze(0).cpu().numpy(), return_timestamps=True)
139
  return result["text"]
140
 
 
 
141
  nlp = stanza.Pipeline(lang='hi', processors='tokenize,pos')
142
 
143
- def POS_policy(input):
144
- # Same implementation as in your previous code
145
- lst = input
146
- doc = nlp(lst)
147
  words = doc.sentences[-1].words
148
  n = len(words)
149
  i = n-1
150
- while(i):
151
- if words[i].upos == 'NOUN' or words[i].upos == 'VERB':
 
152
  return i
153
- else:
154
- pass
155
  i -= 1
156
  return 0
157
 
158
  def generate_images_from_audio(audio_path, base_strength=0.8, base_guidance_scale=12):
159
- # Similar implementation with modifications for Streamlit
160
  text_tot = transcribe_audio_to_hindi(audio_path)
161
 
162
  st.write(f'Transcripted sentence: {text_tot}')
@@ -164,6 +173,7 @@ def generate_images_from_audio(audio_path, base_strength=0.8, base_guidance_scal
164
  cur_sent = ''
165
  prev_idx = 0
166
  generated_images = []
 
167
 
168
  for word in text_tot.split():
169
  cur_sent += word + ' '
 
1
+ import os
2
+ import subprocess
3
+ import sys
4
+
5
+ # Clone required repositories
6
+ def clone_repositories():
7
+ repos = [
8
+ ('https://github.com/AI4Bharat/IndicTrans2.git', 'indictrans2'),
9
+ ('https://github.com/VarunGumma/IndicTransToolkit.git', 'indictranstoolkit')
10
+ ]
11
+
12
+ for repo_url, repo_dir in repos:
13
+ if not os.path.exists(repo_dir):
14
+ subprocess.check_call(['git', 'clone', repo_url, repo_dir])
15
+ sys.path.append(os.path.abspath(repo_dir))
16
+
17
+ # Clone repositories before importing
18
+ clone_repositories()
19
+
20
  import streamlit as st
21
  import torch
22
  import librosa
23
  import matplotlib.pyplot as plt
24
  from PIL import Image
 
 
 
 
25
  import torchaudio
 
26
  from transformers import (
27
+ AutoModelForSpeechSeq2Seq,
28
+ AutoProcessor,
29
+ pipeline,
30
  AutoModelForSeq2SeqLM,
31
  AutoTokenizer,
32
+ BitsAndBytesConfig
33
  )
34
+ from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler, StableDiffusionImg2ImgPipeline
 
 
 
35
  import stanza
36
+ import numpy as np
37
+
38
+ from indictranstoolkit import IndicProcessor
39
 
 
40
  class TransGen:
41
+ def __init__(
42
+ self,
43
+ translation_model="ai4bharat/indictrans2-indic-en-1B",
44
+ stable_diff_model="stabilityai/stable-diffusion-2-base",
45
+ src_lang='hin_Deva',
46
+ tgt_lang='eng_Latn'
47
+ ):
48
  self.bnb_config = BitsAndBytesConfig(load_in_4bit=True)
49
  self.tokenizer = AutoTokenizer.from_pretrained(translation_model, trust_remote_code=True)
50
  self.model = AutoModelForSeq2SeqLM.from_pretrained(translation_model, trust_remote_code=True, quantization_config=self.bnb_config)
 
60
  self.img2img_pipe = self.img2img_pipe.to('cuda')
61
 
62
  def translate(self, input_sentences):
 
63
  batch = self.ip.preprocess_batch(
64
  input_sentences,
65
  src_lang=self.src_lang,
 
91
  )
92
 
93
  translations = self.ip.postprocess_batch(generated_tokens, lang=self.tgt_lang)
 
94
  return translations
95
 
96
  def generate_image(self, prompt, prev_image, strength=1.0, guidance_scale=7.5):
 
97
  strength = float(strength) if strength is not None else 1.0
98
  guidance_scale = float(guidance_scale) if guidance_scale is not None else 7.5
99
 
 
113
  return image.images[0]
114
 
115
  def run(self, input_sentences, strength, guidance_scale, prev_image=None):
 
116
  translations = self.translate(input_sentences)
117
  sentence = translations[0]
118
  image = self.generate_image(sentence, prev_image, strength, guidance_scale)
119
  return sentence, image
120
 
 
 
 
 
121
  def transcribe_audio_to_hindi(audio_path: str) -> str:
 
122
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
123
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
124
 
 
149
  result = whisper_pipe(waveform.squeeze(0).cpu().numpy(), return_timestamps=True)
150
  return result["text"]
151
 
152
+ # Download Stanza resources
153
+ stanza.download('hi')
154
  nlp = stanza.Pipeline(lang='hi', processors='tokenize,pos')
155
 
156
+ def POS_policy(input_text):
157
+ doc = nlp(input_text)
 
 
158
  words = doc.sentences[-1].words
159
  n = len(words)
160
  i = n-1
161
+
162
+ while i >= 0:
163
+ if words[i].upos in ['NOUN', 'VERB']:
164
  return i
 
 
165
  i -= 1
166
  return 0
167
 
168
  def generate_images_from_audio(audio_path, base_strength=0.8, base_guidance_scale=12):
 
169
  text_tot = transcribe_audio_to_hindi(audio_path)
170
 
171
  st.write(f'Transcripted sentence: {text_tot}')
 
173
  cur_sent = ''
174
  prev_idx = 0
175
  generated_images = []
176
+ transgen = TransGen()
177
 
178
  for word in text_tot.split():
179
  cur_sent += word + ' '