Soma Dhavala commited on
Commit
78a6e57
·
1 Parent(s): 03d4021

add bhashini for hindi

Browse files
Files changed (2) hide show
  1. app.py +40 -5
  2. requirements.txt +1 -1
app.py CHANGED
@@ -3,24 +3,59 @@
3
  import gradio as gr
4
  import openai
5
  import os
 
6
 
7
 
8
 
 
 
 
 
 
 
 
 
 
 
9
  api_key = os.getenv('OPEN_API_KEY')
10
  openai.api_key = api_key
11
 
12
 
13
  global_history = [{"role": "assistant", "content": "Hi, I am a chatbot. I can converse in English. I can answer your questions about farming in India. Ask me anything!"}]
14
 
15
-
16
  from pydub import AudioSegment
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def get_asr_output(audio_path,lang):
19
  audio = AudioSegment.from_file(audio_path)
20
- audio.export("temp.mp3", format="mp3")
21
- file = open("temp.mp3","rb")
 
 
 
 
 
 
22
  transcription = openai.Audio.transcribe("whisper-1", file, language=lang)
23
- return transcription.text
 
 
 
 
24
 
25
  def add_text(history, audio_path,lang):
26
  global global_history
@@ -79,4 +114,4 @@ with gr.Blocks(title="Krishi GPT Demo") as demo:
79
  lang.change(clear_history, [lang], chatbot, queue=False)
80
 
81
 
82
- demo.launch(share=False)
 
3
  import gradio as gr
4
  import openai
5
  import os
6
+ import torch
7
 
8
 
9
 
10
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
+ print("Device type:", device)
12
+
13
+ from transformers import pipeline
14
+ from transformers import (
15
+ AutoTokenizer,
16
+ WhisperProcessor,
17
+ WhisperForConditionalGeneration,
18
+ )
19
+
20
  api_key = os.getenv('OPEN_API_KEY')
21
  openai.api_key = api_key
22
 
23
 
24
  global_history = [{"role": "assistant", "content": "Hi, I am a chatbot. I can converse in English. I can answer your questions about farming in India. Ask me anything!"}]
25
 
 
26
  from pydub import AudioSegment
27
 
28
+ def get_asr_model_processor(model_id):
29
+ processor = WhisperProcessor.from_pretrained(model_id,model_max_length=225)
30
+ model = WhisperForConditionalGeneration.from_pretrained(model_id).to(device)
31
+ # model.forced_decoder_ids =None
32
+ model.config.max_new_token = 200
33
+ return {
34
+ "model": model,
35
+ "processor": processor
36
+ }
37
+ model_proc_dict = get_asr_model_processor("vasista22/whisper-hindi-large-v2")
38
+ asr_pipe = pipeline("automatic-speech-recognition", model=model_proc_dict["model"], tokenizer=model_proc_dict["processor"].tokenizer, feature_extractor=model_proc_dict["processor"].feature_extractor,device=device)
39
+
40
+
41
+
42
+
43
  def get_asr_output(audio_path,lang):
44
  audio = AudioSegment.from_file(audio_path)
45
+ audio.export("temp.wav", format="wav")
46
+ file = open("temp.wav","rb")
47
+
48
+ print(lang)
49
+ if lang == "hi":
50
+ op_text = asr_pipe("temp.wav")
51
+ print('ai4bharat',op_text)
52
+
53
  transcription = openai.Audio.transcribe("whisper-1", file, language=lang)
54
+ print('whisper',transcription)
55
+ op_text = transcription.text
56
+ print(transcription)
57
+
58
+ return op_text
59
 
60
  def add_text(history, audio_path,lang):
61
  global global_history
 
114
  lang.change(clear_history, [lang], chatbot, queue=False)
115
 
116
 
117
+ demo.launch(share=True)
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  openai==0.27.4
2
  gradio==3.21.0
3
- pydub==0.25.1
 
1
  openai==0.27.4
2
  gradio==3.21.0
3
+ pydub==0.25.1