gamingflexer commited on
Commit
a25b679
·
2 Parent(s): 179f729 86a0acd

Merge branch 'main' of https://github.com/gamingflexer/Catalog-Digitization-

Browse files
src/module/audio_text.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from whisper_jax import FlaxWhisperPipline
2
+ # import jax.numpy as jnp
3
+ import whisper
4
+ print(whisper.__file__)
5
+ from openai import OpenAI
6
+ from config import OPENAI_API_KEY
7
+ import os
8
+
9
+ client = OpenAI()
10
+ os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
11
+
12
+
13
+ def whisper_pipeline_tpu(audio):
14
+ pipeline = FlaxWhisperPipline("openai/whisper-large-v3", dtype=jnp.bfloat16, batch_size=16)
15
+ text = pipeline(audio)
16
+ return text
17
+
18
+
19
+
20
+ def whisper_pipeline(audio_path):
21
+ model = whisper.load_model("medium")
22
+ # load audio and pad/trim it to fit 30 seconds
23
+ audio = whisper.load_audio(audio_path)
24
+ audio = whisper.pad_or_trim(audio)
25
+ # make log-Mel spectrogram and move to the same device as the model
26
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
27
+ # detect the spoken language
28
+ _, probs = model.detect_language(mel)
29
+ print(f"Detected language: {max(probs, key=probs.get)}")
30
+ # decode the audio
31
+ options = whisper.DecodingOptions()
32
+ result = whisper.decode(model, mel, options)
33
+ # print the recognized text
34
+ print(result.text)
35
+ return result.text
36
+
37
+
38
+
39
+
40
+
41
+ def whisper_openai(audio_path):
42
+ audio_file= open(audio_path, "rb")
43
+ transcript = client.audio.transcriptions.create(
44
+ model="whisper-1",
45
+ file=audio_file
46
+ )
47
+ return transcript
48
+
49
+ whisper_pipeline()
src/module/image_enhance.py CHANGED
@@ -2,6 +2,8 @@ import cv2
2
  import os
3
  from config import file_Directory
4
  import numpy as np
 
 
5
  class Image_Enhance():
6
 
7
  def __init__(self, image_path) -> None:
@@ -11,9 +13,9 @@ class Image_Enhance():
11
  # Load the image
12
  image = cv2.imread(self.image_path)
13
  #Plot the original image
14
- alpha = 1.5
15
  # control brightness by 50
16
- beta = -150
17
  image2 = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
18
  #Save the image
19
  # imagepth = os.path.join(os.path.dirname(self.image_path), 'Brightness & contrast.jpg')
@@ -21,6 +23,32 @@ class Image_Enhance():
21
  cv2.imwrite(imagepth, image2)
22
  return imagepth
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def sharpen(self, imagepth):
25
  image = cv2.imread(imagepth)
26
  # Create the sharpening kernel
@@ -74,9 +102,10 @@ class Image_Enhance():
74
  cv2.imwrite(imagepath, image2)
75
 
76
 
77
- obj = Image_Enhance(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/Product Images/Bru_Instant_Coffee_Powder.png")
78
  pth = obj.brightness_Adjust()
79
  sharpen = obj.sharpen(pth)
80
  lapacian_sharpen = obj.lapacian_sharpen(sharpen)
81
- noise = obj.removing_noise(pth)
82
- obj.enhance_color(noise)
 
 
2
  import os
3
  from config import file_Directory
4
  import numpy as np
5
+ from PIL import Image
6
+
7
  class Image_Enhance():
8
 
9
  def __init__(self, image_path) -> None:
 
13
  # Load the image
14
  image = cv2.imread(self.image_path)
15
  #Plot the original image
16
+ alpha = -1.1
17
  # control brightness by 50
18
+ beta = 70
19
  image2 = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
20
  #Save the image
21
  # imagepth = os.path.join(os.path.dirname(self.image_path), 'Brightness & contrast.jpg')
 
23
  cv2.imwrite(imagepth, image2)
24
  return imagepth
25
 
26
+ def remove_flash(self, imagepth):
27
+ image = cv2.imread(imagepth)
28
+ # cv2.cvtColor is applied over the
29
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
30
+
31
+ # Apply adaptive thresholding to segment the text
32
+ thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
33
+
34
+ # Apply Gaussian blur to the grayscale image to reduce noise
35
+ blurred = cv2.GaussianBlur(gray, (5, 5), 0)
36
+
37
+ # Threshold the blurred image to create a binary mask for the flashlight glare
38
+ _, mask = cv2.threshold(blurred, 240, 255, cv2.THRESH_BINARY_INV)
39
+
40
+ # Combine the text and glare masks
41
+ mask = cv2.bitwise_or(mask, thresh)
42
+
43
+ # Apply morphological closing to further remove small areas of glare
44
+ kernel = np.ones((5,5),np.uint8)
45
+ mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
46
+
47
+ # Apply the mask to the original image to remove flashlight glare
48
+ result = cv2.bitwise_and(image, image, mask=mask)
49
+
50
+ cv2.imwrite(os.path.join(file_Directory, 'remove_flash.jpg'), result)
51
+
52
  def sharpen(self, imagepth):
53
  image = cv2.imread(imagepth)
54
  # Create the sharpening kernel
 
102
  cv2.imwrite(imagepath, image2)
103
 
104
 
105
+ obj = Image_Enhance(r"data/Catalog Digitization/ONDC Test Data _ Images/Product Images/Bru_Instant_Coffee_Powder.png")
106
  pth = obj.brightness_Adjust()
107
  sharpen = obj.sharpen(pth)
108
  lapacian_sharpen = obj.lapacian_sharpen(sharpen)
109
+ noise = obj.removing_noise(sharpen)
110
+ obj.enhance_color(noise)
111
+ obj.remove_flash(sharpen)