VenkateshRoshan commited on
Commit
191f3b0
·
1 Parent(s): 5eec0c8

Dockerfile Updated

Browse files
Files changed (4) hide show
  1. app.py +55 -22
  2. automatic_deployer.py +7 -0
  3. dockerfile +11 -3
  4. requirements.txt +2 -1
app.py CHANGED
@@ -6,17 +6,27 @@ from transformers import pipeline
6
  from huggingface_hub import InferenceClient
7
  import time
8
  import psutil
 
9
  # import torch
10
  # import numpy as np
11
 
12
  # Ensure CUDA is available and set device accordingly
13
  # device = 0 if torch.cuda.is_available() else -1
14
 
 
 
 
 
 
 
 
 
 
15
  model_id = "openai/whisper-small"
16
  client = InferenceClient(model_id,token=os.getenv('HF_TOKEN'))
17
  pipe = pipeline("automatic-speech-recognition", model=model_id) #, device=device)
18
 
19
- print(f'The Server is Running !!!')
20
 
21
  def transcribe(inputs, use_api):
22
  start = time.time()
@@ -28,32 +38,55 @@ def transcribe(inputs, use_api):
28
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
29
 
30
  try:
31
- if use_api:
32
- print(f'Using API for transcription...')
33
- API_STATUS = 'Using API it took: '
34
- # Use InferenceClient (API) if checkbox is checked
35
- res = client.automatic_speech_recognition(inputs).text
36
- else:
37
- print(f'Using local pipeline for transcription...')
38
- # Use local pipeline if checkbox is unchecked
39
- API_STATUS = 'Using local pipeline it took: '
40
- res = pipe(inputs, chunk_length_s=30)["text"]
41
 
42
- end = time.time() - start
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  # Measure memory after running the transcription process
45
  memory_after = psutil.Process(os.getpid()).memory_info().rss
 
 
46
 
47
- # Calculate the difference to see how much memory was used by the code
48
- memory_used = memory_after - memory_before # Memory used in bytes
49
- memory_used_gb = round(memory_used / (1024 ** 3), 2) # Convert memory used to GB
50
- total_memory_gb = round(psutil.virtual_memory().total / (1024 ** 3), 2) # Total RAM in GB
51
-
52
- # Calculate the percentage of RAM used by this process
53
- memory_used_percent = round((memory_used / psutil.virtual_memory().total) * 100, 2)
54
 
55
- return res, API_STATUS + str(round(end, 2)) + ' seconds', f"RAM Used by code: {memory_used_gb} GB ({memory_used_percent}%) Total RAM: {total_memory_gb} GB"
56
-
57
 
58
  except Exception as e:
59
  return fr'Error: {str(e)}', None, None
@@ -107,4 +140,4 @@ with demo:
107
  # # time_taken = gr.Textbox(label="Time taken", type="text") # Time taken outside the interfaces
108
 
109
  if __name__ == "__main__":
110
- demo.queue().launch()
 
6
  from huggingface_hub import InferenceClient
7
  import time
8
  import psutil
9
+ from prometheus_client import start_http_server, Summary, Counter, Gauge
10
  # import torch
11
  # import numpy as np
12
 
13
  # Ensure CUDA is available and set device accordingly
14
  # device = 0 if torch.cuda.is_available() else -1
15
 
16
+ # Initialize Prometheus metrics
17
+ REQUEST_COUNT = Counter("transcription_requests_total", "Total transcription requests", ["method"])
18
+ REQUEST_DURATION = Summary("transcription_request_duration_seconds", "Duration of transcription requests in seconds", ["method"])
19
+ MEMORY_USAGE = Gauge("transcription_memory_usage_bytes", "Memory used by the transcription function")
20
+ RAM_USAGE_PERCENTAGE = Gauge("ram_usage_percentage", "Percentage of total RAM used by the transcription function")
21
+
22
+ # Start the Prometheus HTTP server to expose metrics
23
+ start_http_server(8000) # Port 8000 is the standard for Prometheus metrics
24
+
25
  model_id = "openai/whisper-small"
26
  client = InferenceClient(model_id,token=os.getenv('HF_TOKEN'))
27
  pipe = pipeline("automatic-speech-recognition", model=model_id) #, device=device)
28
 
29
+ print(f'The Server is Running with prometheus Metrics enabled !!!')
30
 
31
  def transcribe(inputs, use_api):
32
  start = time.time()
 
38
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
39
 
40
  try:
41
+ # if use_api:
42
+ # print(f'Using API for transcription...')
43
+ # API_STATUS = 'Using API it took: '
44
+ # # Use InferenceClient (API) if checkbox is checked
45
+ # res = client.automatic_speech_recognition(inputs).text
46
+ # else:
47
+ # print(f'Using local pipeline for transcription...')
48
+ # # Use local pipeline if checkbox is unchecked
49
+ # API_STATUS = 'Using local pipeline it took: '
50
+ # res = pipe(inputs, chunk_length_s=30)["text"]
51
 
52
+ # end = time.time() - start
53
 
54
+ # # Measure memory after running the transcription process
55
+ # memory_after = psutil.Process(os.getpid()).memory_info().rss
56
+
57
+ # # Calculate the difference to see how much memory was used by the code
58
+ # memory_used = memory_after - memory_before # Memory used in bytes
59
+ # memory_used_gb = round(memory_used / (1024 ** 3), 2) # Convert memory used to GB
60
+ # total_memory_gb = round(psutil.virtual_memory().total / (1024 ** 3), 2) # Total RAM in GB
61
+
62
+ # # Calculate the percentage of RAM used by this process
63
+ # memory_used_percent = round((memory_used / psutil.virtual_memory().total) * 100, 2)
64
+
65
+ # return res, API_STATUS + str(round(end, 2)) + ' seconds', f"RAM Used by code: {memory_used_gb} GB ({memory_used_percent}%) Total RAM: {total_memory_gb} GB"
66
+ method = 'API' if use_api else 'Local Pipeline'
67
+
68
+ # Start timing for Prometheus
69
+ with REQUEST_DURATION.labels(method=method).time():
70
+ REQUEST_COUNT.labels(method=method).inc() # Increment the request counter
71
+
72
+ # Transcription
73
+ if use_api:
74
+ print(f'Using API for transcription...')
75
+ res = client.automatic_speech_recognition(inputs).text
76
+ else:
77
+ print(f'Using local pipeline for transcription...')
78
+ res = pipe(inputs, chunk_length_s=30)["text"]
79
+
80
  # Measure memory after running the transcription process
81
  memory_after = psutil.Process(os.getpid()).memory_info().rss
82
+ memory_used = memory_after - memory_before
83
+ MEMORY_USAGE.set(memory_used) # Set memory usage in bytes
84
 
85
+ total_memory_percent = psutil.virtual_memory().percent
86
+ RAM_USAGE_PERCENTAGE.set(total_memory_percent) # Set RAM usage as a percentage
 
 
 
 
 
87
 
88
+ end = time.time() - start
89
+ return res, f"{method} took: {round(end, 2)} seconds", f"RAM Used by code: {memory_used / (1024 ** 3):.2f} GB ({total_memory_percent}%)"
90
 
91
  except Exception as e:
92
  return fr'Error: {str(e)}', None, None
 
140
  # # time_taken = gr.Textbox(label="Time taken", type="text") # Time taken outside the interfaces
141
 
142
  if __name__ == "__main__":
143
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)
automatic_deployer.py CHANGED
@@ -1,5 +1,6 @@
1
  import subprocess
2
  import os
 
3
  import time
4
 
5
  PORT=22013
@@ -19,6 +20,12 @@ def deploy():
19
  except subprocess.CalledProcessError as e:
20
  print(f"Error occurred while deploying: {e.stderr.decode()}")
21
 
 
 
 
 
 
 
22
  def checkStatus(HOST, PORT):
23
  print(f'Checking the status of the app...')
24
  try:
 
1
  import subprocess
2
  import os
3
+ import socket
4
  import time
5
 
6
  PORT=22013
 
20
  except subprocess.CalledProcessError as e:
21
  print(f"Error occurred while deploying: {e.stderr.decode()}")
22
 
23
+ def is_port_busy(host, port):
24
+ """Check if the specified port on the host is busy."""
25
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
26
+ sock.settimeout(1) # 1 second timeout
27
+ return sock.connect_ex((host, port)) == 0 # Returns True if port is busy
28
+
29
  def checkStatus(HOST, PORT):
30
  print(f'Checking the status of the app...')
31
  try:
dockerfile CHANGED
@@ -9,6 +9,7 @@ WORKDIR /app
9
  # Install FFmpeg and other dependencies
10
  RUN apt-get update
11
  RUN apt-get install -y ffmpeg
 
12
  RUN apt-get clean
13
 
14
  # Copy the current directory contents into the container at /app
@@ -17,8 +18,15 @@ COPY . /app
17
  # Install any needed packages specified in requirements.txt
18
  RUN pip install --no-cache-dir -r requirements.txt
19
 
20
- # Make port 5000 available to the world outside this container
21
- EXPOSE 5000
 
 
 
 
 
22
 
23
  # Run app.py when the container launches
24
- CMD ["python", "app.py"]
 
 
 
9
  # Install FFmpeg and other dependencies
10
  RUN apt-get update
11
  RUN apt-get install -y ffmpeg
12
+ RUN apt-get install -y prometheus-node-exporter
13
  RUN apt-get clean
14
 
15
  # Copy the current directory contents into the container at /app
 
18
  # Install any needed packages specified in requirements.txt
19
  RUN pip install --no-cache-dir -r requirements.txt
20
 
21
+ # Make port 7860 available to the world outside this container
22
+ EXPOSE 7860
23
+ # Prometheus Node Exporter metrics
24
+ EXPOSE 25561
25
+ # Prometheus Python app metrics
26
+ EXPOSE 25562
27
+
28
 
29
  # Run app.py when the container launches
30
+ # CMD ["python", "app.py"]
31
+ # Run both the Node Exporter and the Gradio application
32
+ CMD ["sh", "-c", "prometheus-node-exporter & python app.py"]
requirements.txt CHANGED
@@ -5,4 +5,5 @@ huggingface_hub
5
  pytest
6
  gradio
7
  ffmpeg
8
- psutil
 
 
5
  pytest
6
  gradio
7
  ffmpeg
8
+ psutil
9
+ prometheus_client