VenkateshRoshan commited on
Commit
535c52b
·
1 Parent(s): 4ab4681

App Code updated

Browse files
Files changed (2) hide show
  1. app.py +133 -66
  2. requirements.txt +3 -12
app.py CHANGED
@@ -1,85 +1,152 @@
1
- import gradio as gr
2
- from transformers import pipeline, AutoModelForImageSegmentation
3
- from gradio_imageslider import ImageSlider
4
- import torch
5
- from torchvision import transforms
6
- import spaces
7
- from PIL import Image
8
-
9
- import numpy as np
10
- import time
11
-
12
- birefnet = AutoModelForImageSegmentation.from_pretrained(
13
- "ZhengPeng7/BiRefNet", trust_remote_code=True
14
- )
15
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
-
17
- print("Using device:", device)
18
-
19
- birefnet.to(device)
20
- transform_image = transforms.Compose(
21
- [
22
- transforms.Resize((1024, 1024)),
23
- transforms.ToTensor(),
24
- transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
25
- ]
26
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # @spaces.GPU
29
  # def PreProcess(image):
30
- # size = image.size
31
- # image = transform_image(image).unsqueeze(0).to(device)
32
 
33
  # with torch.no_grad():
34
- # preds = birefnet(image)[-1].sigmoid().cpu()
35
  # pred = preds[0].squeeze()
36
- # pred = transforms.ToPILImage()(pred)
37
- # mask = pred.resize(size)
38
- # # image.putalpha(mask)
39
- # return image
40
 
41
- @spaces.GPU
42
- def PreProcess(image):
43
- size = image.size # Save original size
44
- image_tensor = transform_image(image).unsqueeze(0).to(device) # Transform the image into a tensor
45
 
46
- with torch.no_grad():
47
- preds = birefnet(image_tensor)[-1].sigmoid().cpu() # Get predictions
48
- pred = preds[0].squeeze()
49
 
50
- # Convert the prediction tensor to a PIL image
51
- pred_pil = transforms.ToPILImage()(pred)
52
 
53
- # Resize the mask to match the original image size
54
- mask = pred_pil.resize(size)
55
 
56
- # Convert the original image (passed as input) to a PIL image
57
- image_pil = image.convert("RGBA") # Ensure the image has an alpha channel
58
 
59
- # Apply the alpha mask to the image
60
- image_pil.putalpha(mask)
 
 
 
 
 
 
61
 
62
- return image_pil
 
63
 
64
- def segment_image(image):
65
- start = time.time()
66
- image = Image.fromarray(image)
67
- image = image.convert("RGB")
68
- org = image.copy()
69
- image = PreProcess(image)
70
- time_taken = np.round((time.time() - start),2)
71
- return (image, org), time_taken
72
 
73
- slider = ImageSlider(label='birefnet', type="pil")
74
- image = gr.Image(label="Upload an Image")
75
 
76
- butterfly = Image.open("butterfly.png")
77
- Dog = Image.open('Dog.jpg')
78
 
79
- time_taken = gr.Textbox(label="Time taken", type="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- demo = gr.Interface(
82
- segment_image, inputs=image, outputs=[slider,time_taken], examples=[butterfly,Dog], api_name="BiRefNet")
83
 
84
- if __name__ == '__main__' :
85
- demo.launch()
 
1
+ # import gradio as gr
2
+ # from transformers import pipeline, AutoModelForImageSegmentation
3
+ # from gradio_imageslider import ImageSlider
4
+ # import torch
5
+ # from torchvision import transforms
6
+ # import spaces
7
+ # from PIL import Image
8
+
9
+ # import numpy as np
10
+ # import time
11
+
12
+ # birefnet = AutoModelForImageSegmentation.from_pretrained(
13
+ # "ZhengPeng7/BiRefNet", trust_remote_code=True
14
+ # )
15
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+
17
+ # print("Using device:", device)
18
+
19
+ # birefnet.to(device)
20
+ # transform_image = transforms.Compose(
21
+ # [
22
+ # transforms.Resize((1024, 1024)),
23
+ # transforms.ToTensor(),
24
+ # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
25
+ # ]
26
+ # )
27
+
28
+ # # @spaces.GPU
29
+ # # def PreProcess(image):
30
+ # # size = image.size
31
+ # # image = transform_image(image).unsqueeze(0).to(device)
32
+
33
+ # # with torch.no_grad():
34
+ # # preds = birefnet(image)[-1].sigmoid().cpu()
35
+ # # pred = preds[0].squeeze()
36
+ # # pred = transforms.ToPILImage()(pred)
37
+ # # mask = pred.resize(size)
38
+ # # # image.putalpha(mask)
39
+ # # return image
40
 
41
  # @spaces.GPU
42
  # def PreProcess(image):
43
+ # size = image.size # Save original size
44
+ # image_tensor = transform_image(image).unsqueeze(0).to(device) # Transform the image into a tensor
45
 
46
  # with torch.no_grad():
47
+ # preds = birefnet(image_tensor)[-1].sigmoid().cpu() # Get predictions
48
  # pred = preds[0].squeeze()
 
 
 
 
49
 
50
+ # # Convert the prediction tensor to a PIL image
51
+ # pred_pil = transforms.ToPILImage()(pred)
 
 
52
 
53
+ # # Resize the mask to match the original image size
54
+ # mask = pred_pil.resize(size)
 
55
 
56
+ # # Convert the original image (passed as input) to a PIL image
57
+ # image_pil = image.convert("RGBA") # Ensure the image has an alpha channel
58
 
59
+ # # Apply the alpha mask to the image
60
+ # image_pil.putalpha(mask)
61
 
62
+ # return image_pil
 
63
 
64
+ # def segment_image(image):
65
+ # start = time.time()
66
+ # image = Image.fromarray(image)
67
+ # image = image.convert("RGB")
68
+ # org = image.copy()
69
+ # image = PreProcess(image)
70
+ # time_taken = np.round((time.time() - start),2)
71
+ # return (image, org), time_taken
72
 
73
+ # slider = ImageSlider(label='birefnet', type="pil")
74
+ # image = gr.Image(label="Upload an Image")
75
 
76
+ # butterfly = Image.open("butterfly.png")
77
+ # Dog = Image.open('Dog.jpg')
 
 
 
 
 
 
78
 
79
+ # time_taken = gr.Textbox(label="Time taken", type="text")
 
80
 
81
+ # demo = gr.Interface(
82
+ # segment_image, inputs=image, outputs=[slider,time_taken], examples=[butterfly,Dog], api_name="BiRefNet")
83
 
84
+ # if __name__ == '__main__' :
85
+ # demo.launch()
86
+
87
+ import requests
88
+ import gradio as gr
89
+ import tempfile
90
+ import os
91
+ from transformers import pipeline
92
+ from huggingface_hub import InferenceClient
93
+
94
+ model_id = "openai/whisper-large-v3"
95
+ client = InferenceClient(model_id)
96
+ pipe = pipeline("automatic-speech-recognition", model=model_id)
97
+
98
+ # def transcribe(inputs, task):
99
+ # if inputs is None:
100
+ # raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
101
+
102
+ # text = pipe(inputs, chunk_length_s=30)["text"]
103
+ # return text
104
+
105
+ def transcribe(inputs, task):
106
+ if inputs is None:
107
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
108
+
109
+ try:
110
+
111
+ res = client.automatic_speech_recognition(inputs).text
112
+ return res
113
+
114
+ except Exception as e:
115
+ return fr'Error: {str(e)}'
116
+
117
+
118
+ demo = gr.Blocks()
119
+
120
+ mf_transcribe = gr.Interface(
121
+ fn=transcribe,
122
+ inputs=[
123
+ gr.Audio(sources="microphone", type="filepath"),
124
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
125
+ ],
126
+ outputs="text",
127
+ title="Whisper Large V3: Transcribe Audio",
128
+ description=(
129
+ "Transcribe long-form microphone or audio inputs with the click of a button!"
130
+ ),
131
+ allow_flagging="never",
132
+ )
133
+
134
+ file_transcribe = gr.Interface(
135
+ fn=transcribe,
136
+ inputs=[
137
+ gr.Audio(sources="upload", type="filepath", label="Audio file"),
138
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
139
+ ],
140
+ outputs="text",
141
+ title="Whisper Large V3: Transcribe Audio",
142
+ description=(
143
+ "Transcribe long-form microphone or audio inputs with the click of a button!"
144
+ ),
145
+ allow_flagging="never",
146
+ )
147
 
148
+ with demo:
149
+ gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
150
 
151
+ if __name__ == "__main__":
152
+ demo.queue().launch()
requirements.txt CHANGED
@@ -1,13 +1,4 @@
1
- torch
2
- accelerate
3
- opencv-python
4
- spaces
5
- torchvision
6
- pillow
7
- numpy
8
- huggingface-hub
9
- gradio
10
- gradio-imageslider
11
  transformers
12
- timm
13
- kornia
 
1
+ git+https://github.com/openai/whisper.git
 
 
 
 
 
 
 
 
 
2
  transformers
3
+ requests
4
+ huggingface_hub