Spaces:
Paused
Paused
Uploading completed backend
Browse files- backend/.DS_Store +0 -0
- backend/.gitignore +2 -0
- backend/Client.py +81 -0
- backend/__pycache__/Client.cpython-310.pyc +0 -0
- backend/__pycache__/main.cpython-310.pyc +0 -0
- backend/logging.yaml +22 -0
- backend/main.py +344 -0
- backend/mongodb/endpoints/__pycache__/calls.cpython-310.pyc +0 -0
- backend/mongodb/endpoints/__pycache__/users.cpython-310.pyc +0 -0
- backend/mongodb/endpoints/calls.py +74 -0
- backend/mongodb/endpoints/users.py +43 -0
- backend/mongodb/models/__pycache__/calls.cpython-310.pyc +0 -0
- backend/mongodb/models/__pycache__/users.cpython-310.pyc +0 -0
- backend/mongodb/models/calls.py +72 -0
- backend/mongodb/models/users.py +37 -0
- backend/mongodb/operations/__pycache__/calls.cpython-310.pyc +0 -0
- backend/mongodb/operations/__pycache__/users.cpython-310.pyc +0 -0
- backend/mongodb/operations/calls.py +197 -0
- backend/mongodb/operations/users.py +76 -0
- backend/requirements.txt +28 -0
- backend/routes/__init__.py +1 -0
- backend/routes/__pycache__/__init__.cpython-310.pyc +0 -0
- backend/routes/__pycache__/routing.cpython-310.pyc +0 -0
- backend/routes/routing.py +7 -0
- backend/tests/.pytest_cache/.gitignore +2 -0
- backend/tests/.pytest_cache/CACHEDIR.TAG +4 -0
- backend/tests/.pytest_cache/README.md +8 -0
- backend/tests/.pytest_cache/v/cache/lastfailed +11 -0
- backend/tests/.pytest_cache/v/cache/nodeids +42 -0
- backend/tests/.pytest_cache/v/cache/stepwise +1 -0
- backend/tests/__init__.py +0 -0
- backend/tests/__pycache__/__init__.cpython-310.pyc +0 -0
- backend/tests/__pycache__/integration_test.cpython-310-pytest-8.1.1.pyc +0 -0
- backend/tests/__pycache__/test_client.cpython-310-pytest-8.1.1.pyc +0 -0
- backend/tests/__pycache__/test_main.cpython-310-pytest-8.1.1.pyc +0 -0
- backend/tests/__pycache__/test_main.cpython-310.pyc +0 -0
- backend/tests/__pycache__/unit_test.cpython-310-pytest-8.1.1.pyc +0 -0
- backend/tests/integration_test.py +98 -0
- backend/tests/silence.wav +0 -0
- backend/tests/speaking.wav +0 -0
- backend/tests/test_client.py +59 -0
- backend/tests/test_main.py +86 -0
- backend/tests/unit_test.py +277 -0
- backend/utils/__pycache__/text_rank.cpython-310.pyc +0 -0
- backend/utils/text_rank.py +60 -0
backend/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
backend/.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
myenv
|
2 |
+
.pytest_cache
|
backend/Client.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Tuple
|
2 |
+
import wave
|
3 |
+
import os
|
4 |
+
|
5 |
+
import torchaudio
|
6 |
+
from vad import EnergyVAD
|
7 |
+
TARGET_SAMPLING_RATE = 16000
|
8 |
+
|
9 |
+
def create_frames(data: bytes, frame_duration: int) -> Tuple[bytes]:
|
10 |
+
frame_size = int(TARGET_SAMPLING_RATE * (frame_duration / 1000))
|
11 |
+
return (data[i:i + frame_size] for i in range(0, len(data), frame_size)), frame_size
|
12 |
+
|
13 |
+
def detect_activity(energies: list):
|
14 |
+
if sum(energies) < len(energies) / 12:
|
15 |
+
return False
|
16 |
+
count = 0
|
17 |
+
for energy in energies:
|
18 |
+
if energy == 1:
|
19 |
+
count += 1
|
20 |
+
if count == 12:
|
21 |
+
return True
|
22 |
+
else:
|
23 |
+
count = 0
|
24 |
+
return False
|
25 |
+
|
26 |
+
class Client:
|
27 |
+
def __init__(self, sid, client_id, username, call_id=None, original_sr=None):
|
28 |
+
self.sid = sid
|
29 |
+
self.client_id = client_id
|
30 |
+
self.username = username,
|
31 |
+
self.call_id = call_id
|
32 |
+
self.buffer = bytearray()
|
33 |
+
self.output_path = self.sid + "_output_audio.wav"
|
34 |
+
self.target_language = None
|
35 |
+
self.original_sr = original_sr
|
36 |
+
self.vad = EnergyVAD(
|
37 |
+
sample_rate=TARGET_SAMPLING_RATE,
|
38 |
+
frame_length=25,
|
39 |
+
frame_shift=20,
|
40 |
+
energy_threshold=0.05,
|
41 |
+
pre_emphasis=0.95,
|
42 |
+
) # PM - Default values given in the docs for this class
|
43 |
+
|
44 |
+
def add_bytes(self, new_bytes):
|
45 |
+
self.buffer += new_bytes
|
46 |
+
|
47 |
+
def resample_and_clear(self):
|
48 |
+
print(f"📥 [ClientAudioBuffer] Writing {len(self.buffer)} bytes to {self.output_path}")
|
49 |
+
with wave.open(self.sid + "_OG.wav", "wb") as wf:
|
50 |
+
wf.setnchannels(1)
|
51 |
+
wf.setsampwidth(2)
|
52 |
+
wf.setframerate(self.original_sr)
|
53 |
+
wf.setnframes(0)
|
54 |
+
wf.setcomptype("NONE", "not compressed")
|
55 |
+
wf.writeframes(self.buffer)
|
56 |
+
waveform, sample_rate = torchaudio.load(self.sid + "_OG.wav")
|
57 |
+
resampler = torchaudio.transforms.Resample(sample_rate, TARGET_SAMPLING_RATE, dtype=waveform.dtype)
|
58 |
+
resampled_waveform = resampler(waveform)
|
59 |
+
self.buffer = bytearray()
|
60 |
+
return resampled_waveform
|
61 |
+
|
62 |
+
def vad_analyse(self, resampled_waveform):
|
63 |
+
torchaudio.save(self.output_path, resampled_waveform, TARGET_SAMPLING_RATE)
|
64 |
+
vad_array = self.vad(resampled_waveform)
|
65 |
+
# print(f"VAD OUTPUT: {vad_array}")
|
66 |
+
return detect_activity(vad_array)
|
67 |
+
|
68 |
+
def write_to_file(self, resampled_waveform):
|
69 |
+
torchaudio.save(self.output_path, resampled_waveform, TARGET_SAMPLING_RATE)
|
70 |
+
|
71 |
+
def get_length(self):
|
72 |
+
return len(self.buffer)
|
73 |
+
|
74 |
+
def __del__(self):
|
75 |
+
if len(self.buffer) > 0:
|
76 |
+
print(f"🚨 [ClientAudioBuffer] Buffer not empty for {self.sid} ({len(self.buffer)} bytes)!")
|
77 |
+
if os.path.exists(self.output_path):
|
78 |
+
os.remove(self.output_path)
|
79 |
+
if os.path.exists(self.sid + "_OG.wav"):
|
80 |
+
os.remove(self.sid + "_OG.wav")
|
81 |
+
|
backend/__pycache__/Client.cpython-310.pyc
ADDED
Binary file (3.37 kB). View file
|
|
backend/__pycache__/main.cpython-310.pyc
ADDED
Binary file (8.57 kB). View file
|
|
backend/logging.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: 1
|
2 |
+
disable_existing_loggers: false
|
3 |
+
|
4 |
+
formatters:
|
5 |
+
standard:
|
6 |
+
format: "%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s"
|
7 |
+
|
8 |
+
handlers:
|
9 |
+
console:
|
10 |
+
class: logging.StreamHandler
|
11 |
+
formatter: standard
|
12 |
+
stream: ext://sys.stdout
|
13 |
+
|
14 |
+
loggers:
|
15 |
+
uvicorn:
|
16 |
+
error:
|
17 |
+
propagate: true
|
18 |
+
|
19 |
+
root:
|
20 |
+
level: INFO
|
21 |
+
handlers: [console]
|
22 |
+
propagate: no
|
backend/main.py
ADDED
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from operator import itemgetter
|
2 |
+
import os
|
3 |
+
from datetime import datetime
|
4 |
+
import uvicorn
|
5 |
+
from typing import Any, Optional, Tuple, Dict, TypedDict
|
6 |
+
from urllib import parse
|
7 |
+
from uuid import uuid4
|
8 |
+
import logging
|
9 |
+
from fastapi.logger import logger as fastapi_logger
|
10 |
+
import sys
|
11 |
+
|
12 |
+
from fastapi import FastAPI
|
13 |
+
from fastapi.middleware.cors import CORSMiddleware
|
14 |
+
from fastapi import APIRouter, Body, Request, status
|
15 |
+
from pymongo import MongoClient
|
16 |
+
from dotenv import dotenv_values
|
17 |
+
from routes import router as api_router
|
18 |
+
from contextlib import asynccontextmanager
|
19 |
+
import requests
|
20 |
+
|
21 |
+
from typing import List
|
22 |
+
from datetime import date
|
23 |
+
from mongodb.operations.calls import *
|
24 |
+
from mongodb.operations.users import *
|
25 |
+
from mongodb.models.calls import UserCall, UpdateCall
|
26 |
+
# from mongodb.endpoints.calls import *
|
27 |
+
|
28 |
+
from transformers import AutoProcessor, SeamlessM4Tv2Model
|
29 |
+
|
30 |
+
# from seamless_communication.inference import Translator
|
31 |
+
from Client import Client
|
32 |
+
import numpy as np
|
33 |
+
import torch
|
34 |
+
import socketio
|
35 |
+
|
36 |
+
# Configure logger
|
37 |
+
gunicorn_error_logger = logging.getLogger("gunicorn.error")
|
38 |
+
gunicorn_logger = logging.getLogger("gunicorn")
|
39 |
+
uvicorn_access_logger = logging.getLogger("uvicorn.access")
|
40 |
+
|
41 |
+
gunicorn_error_logger.propagate = True
|
42 |
+
gunicorn_logger.propagate = True
|
43 |
+
uvicorn_access_logger.propagate = True
|
44 |
+
|
45 |
+
uvicorn_access_logger.handlers = gunicorn_error_logger.handlers
|
46 |
+
fastapi_logger.handlers = gunicorn_error_logger.handlers
|
47 |
+
|
48 |
+
# sio is the main socket.io entrypoint
|
49 |
+
sio = socketio.AsyncServer(
|
50 |
+
async_mode="asgi",
|
51 |
+
cors_allowed_origins="*",
|
52 |
+
logger=gunicorn_logger,
|
53 |
+
engineio_logger=gunicorn_logger,
|
54 |
+
)
|
55 |
+
# sio.logger.setLevel(logging.DEBUG)
|
56 |
+
socketio_app = socketio.ASGIApp(sio)
|
57 |
+
# app.mount("/", socketio_app)
|
58 |
+
|
59 |
+
# config = dotenv_values(".env")
|
60 |
+
|
61 |
+
# Read connection string from environment vars
|
62 |
+
uri = os.environ['MONGODB_URI']
|
63 |
+
|
64 |
+
# Read connection string from .env file
|
65 |
+
# uri = config['MONGODB_URI']
|
66 |
+
|
67 |
+
|
68 |
+
# MongoDB Connection Lifespan Events
|
69 |
+
@asynccontextmanager
|
70 |
+
async def lifespan(app: FastAPI):
|
71 |
+
# startup logic
|
72 |
+
app.mongodb_client = MongoClient(uri)
|
73 |
+
app.database = app.mongodb_client['IT-Cluster1'] #connect to interpretalk primary db
|
74 |
+
try:
|
75 |
+
app.mongodb_client.admin.command('ping')
|
76 |
+
print("MongoDB Connection Established...")
|
77 |
+
except Exception as e:
|
78 |
+
print(e)
|
79 |
+
|
80 |
+
yield
|
81 |
+
|
82 |
+
# shutdown logic
|
83 |
+
print("Closing MongoDB Connection...")
|
84 |
+
app.mongodb_client.close()
|
85 |
+
|
86 |
+
app = FastAPI(lifespan=lifespan, logger=gunicorn_logger)
|
87 |
+
|
88 |
+
# New CORS funcitonality
|
89 |
+
app.add_middleware(
|
90 |
+
CORSMiddleware,
|
91 |
+
allow_origins=["*"], # configured node app port
|
92 |
+
allow_credentials=True,
|
93 |
+
allow_methods=["*"],
|
94 |
+
allow_headers=["*"],
|
95 |
+
)
|
96 |
+
|
97 |
+
app.include_router(api_router) # include routers for user, calls and transcripts operations
|
98 |
+
|
99 |
+
DEBUG = True
|
100 |
+
|
101 |
+
ESCAPE_HATCH_SERVER_LOCK_RELEASE_NAME = "remove_server_lock"
|
102 |
+
|
103 |
+
TARGET_SAMPLING_RATE = 16000
|
104 |
+
MAX_BYTES_BUFFER = 960_000
|
105 |
+
|
106 |
+
print("")
|
107 |
+
print("")
|
108 |
+
print("=" * 18 + " Interpretalk is starting... " + "=" * 18)
|
109 |
+
|
110 |
+
###############################################
|
111 |
+
# Configure socketio server
|
112 |
+
###############################################
|
113 |
+
|
114 |
+
# TODO PM - change this to the actual path
|
115 |
+
# seamless remnant code
|
116 |
+
CLIENT_BUILD_PATH = "../streaming-react-app/dist/"
|
117 |
+
static_files = {
|
118 |
+
"/": CLIENT_BUILD_PATH,
|
119 |
+
"/assets/seamless-db6a2555.svg": {
|
120 |
+
"filename": CLIENT_BUILD_PATH + "assets/seamless-db6a2555.svg",
|
121 |
+
"content_type": "image/svg+xml",
|
122 |
+
},
|
123 |
+
}
|
124 |
+
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
125 |
+
device = torch.device("cpu")
|
126 |
+
processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
|
127 |
+
|
128 |
+
# PM - hardcoding temporarily as my GPU doesnt have enough vram
|
129 |
+
model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large").to(device)
|
130 |
+
|
131 |
+
|
132 |
+
bytes_data = bytearray()
|
133 |
+
model_name = "seamlessM4T_v2_large"
|
134 |
+
vocoder_name = "vocoder_v2" if model_name == "seamlessM4T_v2_large" else "vocoder_36langs"
|
135 |
+
|
136 |
+
clients = {}
|
137 |
+
rooms = {}
|
138 |
+
|
139 |
+
|
140 |
+
def get_collection_users():
|
141 |
+
return app.database["user_records"]
|
142 |
+
|
143 |
+
def get_collection_calls():
|
144 |
+
return app.database["call_records"]
|
145 |
+
|
146 |
+
|
147 |
+
@app.get("/", response_description="Welcome User")
|
148 |
+
def test():
|
149 |
+
return {"message": "Welcome to InterpreTalk!"}
|
150 |
+
|
151 |
+
|
152 |
+
async def send_translated_text(client_id, username, original_text, translated_text, room_id):
|
153 |
+
# print(rooms) # Debugging
|
154 |
+
# print(clients) # Debugging
|
155 |
+
|
156 |
+
data = {
|
157 |
+
"author_id": str(client_id),
|
158 |
+
"author_username": str(username),
|
159 |
+
"original_text": str(original_text),
|
160 |
+
"translated_text": str(translated_text),
|
161 |
+
"timestamp": str(datetime.now())
|
162 |
+
}
|
163 |
+
gunicorn_logger.info("SENDING TRANSLATED TEXT TO CLIENT")
|
164 |
+
await sio.emit("translated_text", data, room=room_id)
|
165 |
+
gunicorn_logger.info("SUCCESSFULLY SEND AUDIO TO FRONTEND")
|
166 |
+
|
167 |
+
|
168 |
+
@sio.on("connect")
|
169 |
+
async def connect(sid, environ):
|
170 |
+
print(f"📥 [event: connected] sid={sid}")
|
171 |
+
query_params = dict(parse.parse_qsl(environ["QUERY_STRING"]))
|
172 |
+
|
173 |
+
client_id = query_params.get("client_id")
|
174 |
+
gunicorn_logger.info(f"📥 [event: connected] sid={sid}, client_id={client_id}")
|
175 |
+
|
176 |
+
# get username to Client Object from DB
|
177 |
+
username = find_name_from_id(get_collection_users(), client_id)
|
178 |
+
|
179 |
+
# sid = socketid, client_id = client specific ID ,always the same for same user
|
180 |
+
clients[sid] = Client(sid, client_id, username)
|
181 |
+
print(clients[sid].username)
|
182 |
+
gunicorn_logger.warning(f"Client connected: {sid}")
|
183 |
+
gunicorn_logger.warning(clients)
|
184 |
+
|
185 |
+
|
186 |
+
@sio.on("disconnect")
|
187 |
+
async def disconnect(sid):
|
188 |
+
gunicorn_logger.debug(f"📤 [event: disconnected] sid={sid}")
|
189 |
+
|
190 |
+
call_id = clients[sid].call_id
|
191 |
+
user_id = clients[sid].client_id
|
192 |
+
target_language = clients[sid].target_language
|
193 |
+
|
194 |
+
clients.pop(sid, None)
|
195 |
+
|
196 |
+
# Perform Key Term Extraction and summarisation
|
197 |
+
try:
|
198 |
+
# Get combined caption field for call record based on call_id
|
199 |
+
key_terms = term_extraction(get_collection_calls(), call_id, user_id, target_language)
|
200 |
+
|
201 |
+
# Perform summarisation based on target language
|
202 |
+
summary_result = summarise(get_collection_calls(), call_id, user_id, target_language)
|
203 |
+
|
204 |
+
except:
|
205 |
+
gunicorn_logger.error(f"📤 [event: term_extraction/summarisation request error] sid={sid}, call={call_id}")
|
206 |
+
|
207 |
+
|
208 |
+
@sio.on("target_language")
|
209 |
+
async def target_language(sid, target_lang):
|
210 |
+
gunicorn_logger.info(f"📥 [event: target_language] sid={sid}, target_lang={target_lang}")
|
211 |
+
clients[sid].target_language = target_lang
|
212 |
+
|
213 |
+
|
214 |
+
@sio.on("call_user")
|
215 |
+
async def call_user(sid, call_id):
|
216 |
+
clients[sid].call_id = call_id
|
217 |
+
gunicorn_logger.info(f"CALL {sid}: entering room {call_id}")
|
218 |
+
rooms[call_id] = rooms.get(call_id, [])
|
219 |
+
if sid not in rooms[call_id] and len(rooms[call_id]) < 2:
|
220 |
+
rooms[call_id].append(sid)
|
221 |
+
sio.enter_room(sid, call_id)
|
222 |
+
else:
|
223 |
+
gunicorn_logger.info(f"CALL {sid}: room {call_id} is full")
|
224 |
+
# await sio.emit("room_full", room=call_id, to=sid)
|
225 |
+
|
226 |
+
# BO - Get call id from dictionary created during socketio connection
|
227 |
+
client_id = clients[sid].client_id
|
228 |
+
|
229 |
+
gunicorn_logger.warning(f"NOW TRYING TO CREATE DB RECORD FOR Caller with ID: {client_id} for call: {call_id}")
|
230 |
+
# BO -> Create Call Record with Caller and call_id field (None for callee, duration, terms..)
|
231 |
+
request_data = {
|
232 |
+
"call_id": str(call_id),
|
233 |
+
"caller_id": str(client_id),
|
234 |
+
"creation_date": str(datetime.now())
|
235 |
+
}
|
236 |
+
|
237 |
+
response = create_calls(get_collection_calls(), request_data)
|
238 |
+
print(response) # BO - print created db call record
|
239 |
+
|
240 |
+
|
241 |
+
@sio.on("audio_config")
|
242 |
+
async def audio_config(sid, sample_rate):
|
243 |
+
clients[sid].original_sr = sample_rate
|
244 |
+
|
245 |
+
|
246 |
+
@sio.on("answer_call")
|
247 |
+
async def answer_call(sid, call_id):
|
248 |
+
|
249 |
+
clients[sid].call_id = call_id
|
250 |
+
gunicorn_logger.info(f"ANSWER {sid}: entering room {call_id}")
|
251 |
+
rooms[call_id] = rooms.get(call_id, [])
|
252 |
+
if sid not in rooms[call_id] and len(rooms[call_id]) < 2:
|
253 |
+
rooms[call_id].append(sid)
|
254 |
+
sio.enter_room(sid, call_id)
|
255 |
+
else:
|
256 |
+
gunicorn_logger.info(f"ANSWER {sid}: room {call_id} is full")
|
257 |
+
# await sio.emit("room_full", room=call_id, to=sid)
|
258 |
+
|
259 |
+
|
260 |
+
# BO - Get call id from dictionary created during socketio connection
|
261 |
+
client_id = clients[sid].client_id
|
262 |
+
|
263 |
+
# BO -> Update Call Record with Callee field based on call_id
|
264 |
+
gunicorn_logger.warning(f"NOW UPDATING MongoDB RECORD FOR Caller with ID: {client_id} for call: {call_id}")
|
265 |
+
# BO -> Create Call Record with callee_id field (None for callee, duration, terms..)
|
266 |
+
request_data = {
|
267 |
+
"callee_id": client_id
|
268 |
+
}
|
269 |
+
|
270 |
+
response = update_calls(get_collection_calls(), call_id, request_data)
|
271 |
+
print(response) # BO - print created db call record
|
272 |
+
|
273 |
+
|
274 |
+
@sio.on("incoming_audio")
|
275 |
+
async def incoming_audio(sid, data, call_id):
|
276 |
+
try:
|
277 |
+
clients[sid].add_bytes(data)
|
278 |
+
|
279 |
+
if clients[sid].get_length() >= MAX_BYTES_BUFFER:
|
280 |
+
gunicorn_logger.info('Buffer full, now outputting...')
|
281 |
+
output_path = clients[sid].output_path
|
282 |
+
resampled_audio = clients[sid].resample_and_clear()
|
283 |
+
vad_result = clients[sid].vad_analyse(resampled_audio)
|
284 |
+
# source lang is speakers tgt language 😃
|
285 |
+
src_lang = clients[sid].target_language
|
286 |
+
|
287 |
+
if vad_result:
|
288 |
+
gunicorn_logger.info('Speech detected, now processing audio.....')
|
289 |
+
tgt_sid = next(id for id in rooms[call_id] if id != sid)
|
290 |
+
tgt_lang = clients[tgt_sid].target_language
|
291 |
+
# following example from https://github.com/facebookresearch/seamless_communication/blob/main/docs/m4t/README.md#transformers-usage
|
292 |
+
output_tokens = processor(audios=resampled_audio, src_lang=src_lang, return_tensors="pt", sampling_rate=TARGET_SAMPLING_RATE).to(device)
|
293 |
+
model_output = model.generate(**output_tokens, tgt_lang=src_lang, generate_speech=False)[0].tolist()[0]
|
294 |
+
asr_text = processor.decode(model_output, skip_special_tokens=True)
|
295 |
+
print(f"ASR TEXT = {asr_text}")
|
296 |
+
# ASR TEXT => ORIGINAL TEXT
|
297 |
+
|
298 |
+
if src_lang != tgt_lang:
|
299 |
+
t2t_tokens = processor(text=asr_text, src_lang=src_lang, tgt_lang=tgt_lang, return_tensors="pt").to(device)
|
300 |
+
translated_data = model.generate(**t2t_tokens, tgt_lang=tgt_lang, generate_speech=False)[0].tolist()[0]
|
301 |
+
translated_text = processor.decode(translated_data, skip_special_tokens=True)
|
302 |
+
print(f"TRANSLATED TEXT = {translated_text}")
|
303 |
+
else:
|
304 |
+
# PM - both users have same language selected, no need to translate
|
305 |
+
translated_text = asr_text
|
306 |
+
|
307 |
+
# PM - text_output is a list with 1 string
|
308 |
+
await send_translated_text(clients[sid].client_id, clients[sid].username, asr_text, translated_text, call_id)
|
309 |
+
|
310 |
+
# BO -> send translated_text to mongodb as caption record update based on call_id
|
311 |
+
await send_captions(clients[sid].client_id, clients[sid].username, asr_text, translated_text, call_id)
|
312 |
+
|
313 |
+
except Exception as e:
|
314 |
+
gunicorn_logger.error(f"Error in incoming_audio: {e.with_traceback()}")
|
315 |
+
|
316 |
+
|
317 |
+
async def send_captions(client_id, username, original_text, translated_text, call_id):
|
318 |
+
# BO -> Update Call Record with Callee field based on call_id
|
319 |
+
print(f"Now updating Caption field in call record for Caller with ID: {client_id} for call: {call_id}")
|
320 |
+
|
321 |
+
data = {
|
322 |
+
"author_id": str(client_id),
|
323 |
+
"author_username": str(username),
|
324 |
+
"original_text": str(original_text),
|
325 |
+
"translated_text": str(translated_text),
|
326 |
+
"timestamp": str(datetime.now())
|
327 |
+
}
|
328 |
+
|
329 |
+
response = update_captions(get_collection_calls(), get_collection_users(), call_id, data)
|
330 |
+
return response
|
331 |
+
|
332 |
+
|
333 |
+
app.mount("/", socketio_app)
|
334 |
+
|
335 |
+
|
336 |
+
if __name__ == '__main__':
|
337 |
+
uvicorn.run("main:app", host='0.0.0.0', port=7860, log_level="info")
|
338 |
+
|
339 |
+
|
340 |
+
# Running in Docker Container
|
341 |
+
if __name__ != "__main__":
|
342 |
+
fastapi_logger.setLevel(gunicorn_logger.level)
|
343 |
+
else:
|
344 |
+
fastapi_logger.setLevel(logging.DEBUG)
|
backend/mongodb/endpoints/__pycache__/calls.cpython-310.pyc
ADDED
Binary file (3.77 kB). View file
|
|
backend/mongodb/endpoints/__pycache__/users.cpython-310.pyc
ADDED
Binary file (2.01 kB). View file
|
|
backend/mongodb/endpoints/calls.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter, Body, Request, status, HTTPException
|
2 |
+
from typing import List
|
3 |
+
from datetime import date
|
4 |
+
|
5 |
+
import sys
|
6 |
+
|
7 |
+
from ..operations import calls as calls
|
8 |
+
from ..models.calls import UserCaptions, UserCall, UpdateCall
|
9 |
+
from ..endpoints.users import get_collection_users
|
10 |
+
|
11 |
+
router = APIRouter(prefix="/call",
|
12 |
+
tags=["Calls"])
|
13 |
+
|
14 |
+
def get_collection_calls(request: Request):
|
15 |
+
try:
|
16 |
+
return request.app.database["call_records"]
|
17 |
+
except:
|
18 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Unable to find call records Database.")
|
19 |
+
|
20 |
+
@router.post("/create-call", response_description="Create a new user call record", status_code=status.HTTP_201_CREATED, response_model=UserCall)
|
21 |
+
async def create_calls(request: Request, user_calls: UserCall = Body(...)):
|
22 |
+
collection = get_collection_calls(request)
|
23 |
+
return calls.create_calls(collection, user_calls)
|
24 |
+
|
25 |
+
@router.get("/find-call/{call_id}", response_description="Find user's calls based on User ID", response_model=UserCall)
|
26 |
+
async def find_call(request: Request, call_id: str):
|
27 |
+
collection = get_collection_calls(request)
|
28 |
+
return calls.find_call(collection, call_id)
|
29 |
+
|
30 |
+
@router.get("/find-user-calls/{user_id}", response_description="Find user's calls based on User ID", response_model=List[UserCall])
|
31 |
+
async def find_user_calls(request: Request, user_id: str):
|
32 |
+
collection = get_collection_calls(request)
|
33 |
+
return calls.find_user_calls(collection, user_id)
|
34 |
+
|
35 |
+
@router.get("/get-captions/{call_id}/{user_id}", response_description="Find user's calls based on User ID")
|
36 |
+
async def get_caption_text(request: Request, call_id: str, user_id: str):
|
37 |
+
collection = get_collection_calls(request)
|
38 |
+
return calls.get_caption_text(collection, call_id, user_id)
|
39 |
+
|
40 |
+
@router.get("/find-duration/{min_len}/{max_len}", response_description="Find calls based on call duration in minutes", response_model=List[UserCall])
|
41 |
+
async def list_transcripts_by_duration(request: Request, min_len: int, max_len: int):
|
42 |
+
collection = get_collection_calls(request)
|
43 |
+
return calls.list_transcripts_by_duration(collection, min_len, max_len)
|
44 |
+
|
45 |
+
@router.put("/update-call/{call_id}", response_description="Update an existing call", response_model=UpdateCall)
|
46 |
+
async def update_calls(request: Request, call_id: str, user_calls: UpdateCall = Body(...)):
|
47 |
+
collection = get_collection_calls(request)
|
48 |
+
return calls.update_calls(collection, call_id, user_calls)
|
49 |
+
|
50 |
+
@router.put("/update-captions/{call_id}", response_description="Update an existing call", response_model=UpdateCall)
|
51 |
+
async def update_captions(request: Request, call_id: str, user_calls: UserCaptions = Body(...)):
|
52 |
+
call_collection = get_collection_calls(request)
|
53 |
+
user_collection = get_collection_users(request)
|
54 |
+
return calls.update_captions(call_collection, user_collection, call_id, user_calls)
|
55 |
+
|
56 |
+
@router.delete("/delete-call/{call_id}", response_description="Delete a call by its id")
|
57 |
+
async def delete_call(request: Request, call_id: str):
|
58 |
+
collection = get_collection_calls(request)
|
59 |
+
return calls.delete_calls(collection, call_id)
|
60 |
+
|
61 |
+
@router.get("/fuzzy-search/{user_id}/{query}", response_description="Perform fuzzy text search on caption fields", response_model=List[UserCall])
|
62 |
+
async def fuzzy_search(request: Request, user_id: str, query: str):
|
63 |
+
collection = get_collection_calls(request)
|
64 |
+
return calls.fuzzy_search(collection, user_id, query)
|
65 |
+
|
66 |
+
@router.get("/summarise/{call_id}/{user_id}/{target_language}", response_description="Perform gpt-3.5 summarisation on call_id")
|
67 |
+
async def summarise(request: Request, call_id: str, user_id: str, target_language: str):
|
68 |
+
collection = get_collection_calls(request)
|
69 |
+
return calls.summarise(collection, call_id, user_id, target_language)
|
70 |
+
|
71 |
+
@router.get("/term-extraction/{call_id}/{user_id}/{target_language}", response_description="Perform key term extraction on call record")
|
72 |
+
async def term_extraction(request: Request, call_id: str, user_id: str, target_language: str):
|
73 |
+
collection = get_collection_calls(request)
|
74 |
+
return calls.term_extraction(collection, call_id, user_id, target_language)
|
backend/mongodb/endpoints/users.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter, Body, Request, status, HTTPException
|
2 |
+
from typing import List
|
3 |
+
import sys
|
4 |
+
from ..models.users import User, UpdateUser
|
5 |
+
from ..operations import users as users
|
6 |
+
|
7 |
+
router = APIRouter(prefix="/user",
|
8 |
+
tags=["User"])
|
9 |
+
|
10 |
+
def get_collection_users(request: Request):
|
11 |
+
db = request.app.database["user_records"]
|
12 |
+
return db
|
13 |
+
|
14 |
+
@router.post("/", response_description="Create a new user", status_code=status.HTTP_201_CREATED, response_model=User)
|
15 |
+
async def create_user(request: Request, user: User = Body(...)):
|
16 |
+
collection = get_collection_users(request)
|
17 |
+
return users.create_user(collection, user)
|
18 |
+
|
19 |
+
@router.get("/", response_description="List users", response_model=List[User])
|
20 |
+
async def list_users(request: Request):
|
21 |
+
collection = get_collection_users(request)
|
22 |
+
return users.list_users(collection, 100)
|
23 |
+
|
24 |
+
@router.put("/{user_id}", response_description="Update a User", response_model=UpdateUser)
|
25 |
+
async def update_user(request: Request, user_id: str, user: UpdateUser = Body(...)):
|
26 |
+
collection = get_collection_users(request)
|
27 |
+
return users.update_user(collection, user_id, user)
|
28 |
+
|
29 |
+
@router.get("/{user_id}", response_description="Get a single user by id", response_model=User)
|
30 |
+
async def find_user(request: Request, user_id: str):
|
31 |
+
collection = get_collection_users(request)
|
32 |
+
return users.find_user(collection, user_id)
|
33 |
+
|
34 |
+
@router.get("/find-name-id/{user_id}", response_description="Get a username from user id")
|
35 |
+
async def find_name_from_id(request: Request, user_id: str):
|
36 |
+
collection = get_collection_users(request)
|
37 |
+
return users.find_name_from_id(collection, user_id)
|
38 |
+
|
39 |
+
@router.delete("/{user_id}", response_description="Delete a user")
|
40 |
+
async def delete_user(request: Request, user_id:str):
|
41 |
+
collection = get_collection_users(request)
|
42 |
+
return users.delete_user(collection, user_id)
|
43 |
+
|
backend/mongodb/models/__pycache__/calls.cpython-310.pyc
ADDED
Binary file (3.01 kB). View file
|
|
backend/mongodb/models/__pycache__/users.cpython-310.pyc
ADDED
Binary file (1.52 kB). View file
|
|
backend/mongodb/models/calls.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uuid
|
2 |
+
from typing import List, Dict, Optional
|
3 |
+
from datetime import datetime
|
4 |
+
from pydantic import BaseModel, Field, PrivateAttr
|
5 |
+
import sys
|
6 |
+
|
7 |
+
|
8 |
+
''' Class for storing captions generated by SeamlessM4T'''
|
9 |
+
class UserCaptions(BaseModel):
|
10 |
+
_id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4) # private attr not included in http calls
|
11 |
+
author_id: Optional[str] = None
|
12 |
+
author_username: Optional[str] = None
|
13 |
+
original_text: str
|
14 |
+
translated_text: str
|
15 |
+
timestamp: datetime = Field(default_factory=datetime.now)
|
16 |
+
|
17 |
+
class Config:
|
18 |
+
populate_by_name = True
|
19 |
+
json_schema_extra = {
|
20 |
+
"example": {
|
21 |
+
"author_id": "gLZrfTwXyLUPB3eT7xT2HZnZiZT2",
|
22 |
+
"author_username": "shamzino",
|
23 |
+
"original_text": "eng: This is original_text english text",
|
24 |
+
"translated_text": "spa: este es el texto traducido al español",
|
25 |
+
"timestamp": "2024-03-28T16:15:50.956055",
|
26 |
+
|
27 |
+
}
|
28 |
+
}
|
29 |
+
|
30 |
+
|
31 |
+
'''Class for storing past call records from users'''
|
32 |
+
class UserCall(BaseModel):
|
33 |
+
_id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4)
|
34 |
+
call_id: Optional[str] = None
|
35 |
+
caller_id: Optional[str] = None
|
36 |
+
callee_id: Optional[str] = None
|
37 |
+
creation_date: datetime = Field(default_factory=datetime.now, alias="date")
|
38 |
+
captions: Optional[List[UserCaptions]] = None
|
39 |
+
key_terms: Optional[dict] = None
|
40 |
+
summaries: Optional[dict] = None
|
41 |
+
|
42 |
+
|
43 |
+
class Config:
|
44 |
+
populate_by_name = True
|
45 |
+
json_schema_extra = {
|
46 |
+
"example": {
|
47 |
+
"call_id": "65eef930e9abd3b1e3506906",
|
48 |
+
"caller_id": "65ede65b6d246e52aaba9d4f",
|
49 |
+
"callee_id": "65edda944340ac84c1f00758",
|
50 |
+
"captions": [{"author_id": "gLZrfTwXyLUPB3eT7xT2HZnZiZT2", "author_username": "shamzino", "original_text": "eng: This is original_text english text", "translated_text": "spa: este es el texto traducido al español", "timestamp": "2024-03-28T16:15:50.956055"},
|
51 |
+
{"author_id": "g7pR1qCibzQf5mDP9dGtcoWeEc92", "author_username": "benjino", "original_text": "eng: This is source english text", "translated_text": "spa: este es el texto fuente al español", "timestamp": "2024-03-28T16:16:20.34625"}],
|
52 |
+
"key_terms": {"gLZrfTwXyLUPB3eT7xT2HZnZiZT2": ["original_text", "source", "english", "text"], "g7pR1qCibzQf5mDP9dGtcoWeEc92": ["translated_text", "destination", "spanish", "text"]},
|
53 |
+
"summaries": {"gLZrfTwXyLUPB3eT7xT2HZnZiZT2": "This is a short test on lanuguage translation", "65edda944340ac84c1f00758": "Esta es una breve prueba sobre traducción de idiomas."}
|
54 |
+
}
|
55 |
+
}
|
56 |
+
|
57 |
+
|
58 |
+
''' Class for updating User Call record'''
|
59 |
+
class UpdateCall(BaseModel):
|
60 |
+
call_id: Optional[str] = None
|
61 |
+
caller_id: Optional[str] = None
|
62 |
+
callee_id: Optional[str] = None
|
63 |
+
captions: Optional[List[UserCaptions]] = None
|
64 |
+
key_terms: Optional[List[str]] = None
|
65 |
+
|
66 |
+
class Config:
|
67 |
+
populate_by_name = True
|
68 |
+
json_schema_extra = {
|
69 |
+
"example": {
|
70 |
+
"duration": "500"
|
71 |
+
}
|
72 |
+
}
|
backend/mongodb/models/users.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uuid
|
2 |
+
from typing import List, Optional
|
3 |
+
from pydantic import BaseModel, Field, SecretStr, PrivateAttr
|
4 |
+
from pydantic.networks import EmailStr
|
5 |
+
|
6 |
+
|
7 |
+
'''Class for user model used to relate users to past calls'''
|
8 |
+
class User(BaseModel):
|
9 |
+
_id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4) # private attr not included in http calls
|
10 |
+
user_id: str
|
11 |
+
name: str
|
12 |
+
email: EmailStr = Field(unique=True, index=True)
|
13 |
+
|
14 |
+
class Config:
|
15 |
+
populate_by_name = True
|
16 |
+
json_schema_extra = {
|
17 |
+
"example": {
|
18 |
+
"user_id": "65ede65b6d246e52aaba9d4f",
|
19 |
+
"name": "benjolo",
|
20 |
+
"email": "[email protected]"
|
21 |
+
}
|
22 |
+
}
|
23 |
+
|
24 |
+
'''Class for updating user records'''
|
25 |
+
class UpdateUser(BaseModel):
|
26 |
+
user_id: Optional[str] = None
|
27 |
+
name: Optional[str] = None
|
28 |
+
email: Optional[EmailStr] = None
|
29 |
+
|
30 |
+
class Config:
|
31 |
+
populate_by_name = True
|
32 |
+
json_schema_extra = {
|
33 |
+
"example": {
|
34 |
+
"email": "[email protected]"
|
35 |
+
}
|
36 |
+
}
|
37 |
+
|
backend/mongodb/operations/__pycache__/calls.cpython-310.pyc
ADDED
Binary file (5.01 kB). View file
|
|
backend/mongodb/operations/__pycache__/users.cpython-310.pyc
ADDED
Binary file (2.89 kB). View file
|
|
backend/mongodb/operations/calls.py
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import Body, Request, HTTPException, status
|
2 |
+
from fastapi.encoders import jsonable_encoder
|
3 |
+
import sys
|
4 |
+
from ..models.calls import UpdateCall, UserCall, UserCaptions
|
5 |
+
from ..operations.users import *
|
6 |
+
from utils.text_rank import extract_terms
|
7 |
+
from openai import OpenAI
|
8 |
+
|
9 |
+
from time import sleep
|
10 |
+
import os
|
11 |
+
from dotenv import dotenv_values
|
12 |
+
|
13 |
+
|
14 |
+
# Used within calls to create call record in main.py
|
15 |
+
def create_calls(collection, user: UserCall = Body(...)):
|
16 |
+
calls = jsonable_encoder(user)
|
17 |
+
new_calls = collection.insert_one(calls)
|
18 |
+
created_calls = collection.find_one({"_id": new_calls.inserted_id})
|
19 |
+
|
20 |
+
return created_calls
|
21 |
+
|
22 |
+
|
23 |
+
'''Finding calls based on call id'''
|
24 |
+
def find_call(collection, call_id: str):
|
25 |
+
user_calls = collection.find_one({"call_id": call_id})
|
26 |
+
if user_calls is not None:
|
27 |
+
return user_calls
|
28 |
+
else:
|
29 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call with ID: '{call_id}' not found.")
|
30 |
+
|
31 |
+
|
32 |
+
'''Finding calls based on user id'''
|
33 |
+
def find_user_calls(collection, user_id: str):
|
34 |
+
user_calls = list(collection.find({"$or": [{"caller_id": user_id}, {"callee_id": user_id}]})) # match on caller or callee ID
|
35 |
+
if len(user_calls):
|
36 |
+
return user_calls
|
37 |
+
else:
|
38 |
+
return [] # return empty list if no existing calls for TranscriptView frontend component
|
39 |
+
|
40 |
+
|
41 |
+
def update_calls(collection, call_id: str, calls: UpdateCall = Body(...)):
|
42 |
+
calls = {k: v for k, v in calls.items() if v is not None}
|
43 |
+
print(calls)
|
44 |
+
|
45 |
+
if len(calls) >= 1:
|
46 |
+
update_result = collection.update_one({"call_id": call_id}, {"$set": calls})
|
47 |
+
|
48 |
+
if update_result.modified_count == 0:
|
49 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
|
50 |
+
|
51 |
+
if (existing_item := collection.find_one({"call_id": call_id})) is not None:
|
52 |
+
return existing_item
|
53 |
+
|
54 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
|
55 |
+
|
56 |
+
|
57 |
+
def update_captions(call_collection, user_collection, call_id: str, captions: UserCaptions = Body(...)):
|
58 |
+
captions = {k: v for k, v in captions.items() if v is not None}
|
59 |
+
|
60 |
+
# index user_id from caption object
|
61 |
+
userID = captions["author_id"]
|
62 |
+
|
63 |
+
# use user id to get user name
|
64 |
+
username = find_name_from_id(user_collection, userID)
|
65 |
+
|
66 |
+
# add user name to captions json/object
|
67 |
+
captions["author_username"] = username
|
68 |
+
|
69 |
+
if len(captions) >= 1:
|
70 |
+
update_result = call_collection.update_one({"call_id": call_id},
|
71 |
+
{"$push": {"captions": captions}})
|
72 |
+
|
73 |
+
if update_result.modified_count == 0:
|
74 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not updated!")
|
75 |
+
|
76 |
+
if (existing_item := call_collection.find_one({"call_id": call_id})) is not None:
|
77 |
+
return existing_item
|
78 |
+
|
79 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not found!")
|
80 |
+
|
81 |
+
|
82 |
+
def delete_calls(collection, call_id: str):
|
83 |
+
deleted_calls = collection.delete_one({"call_id": call_id})
|
84 |
+
|
85 |
+
if deleted_calls.deleted_count == 1:
|
86 |
+
return f"Call deleted sucessfully!"
|
87 |
+
|
88 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
|
89 |
+
|
90 |
+
|
91 |
+
def get_caption_text(collection, call_id, user_id):
|
92 |
+
call_record = find_call((collection), call_id)
|
93 |
+
|
94 |
+
try: # Check if call has any captions first
|
95 |
+
caption_records = call_record['captions']
|
96 |
+
except KeyError:
|
97 |
+
return None
|
98 |
+
|
99 |
+
combined_text = []
|
100 |
+
|
101 |
+
for caption_segment in caption_records:
|
102 |
+
if caption_segment['author_id'] == user_id:
|
103 |
+
combined_text.append(caption_segment['original_text'])
|
104 |
+
else:
|
105 |
+
combined_text.append(caption_segment['translated_text'])
|
106 |
+
|
107 |
+
return " ".join(combined_text)
|
108 |
+
|
109 |
+
|
110 |
+
# approximate string matching
|
111 |
+
def fuzzy_search(collection, user_id, query):
|
112 |
+
|
113 |
+
# drop any existing indexes and create new one
|
114 |
+
collection.drop_indexes()
|
115 |
+
collection.create_index([('captions.original_text', 'text'), ('captions.tranlated_text', 'text')],
|
116 |
+
name='captions')
|
117 |
+
|
118 |
+
|
119 |
+
pipeline = [
|
120 |
+
{
|
121 |
+
"$search": {
|
122 |
+
"text": {
|
123 |
+
"query": query,
|
124 |
+
"path": {"wildcard": "*"},
|
125 |
+
"fuzzy": {}
|
126 |
+
}
|
127 |
+
}
|
128 |
+
}
|
129 |
+
]
|
130 |
+
|
131 |
+
collection_results = list(collection.aggregate(pipeline))
|
132 |
+
|
133 |
+
# add all users records to output
|
134 |
+
records = []
|
135 |
+
|
136 |
+
for doc in collection_results:
|
137 |
+
if doc['caller_id'] == user_id or doc['callee_id'] == user_id:
|
138 |
+
records.append(doc)
|
139 |
+
|
140 |
+
return records
|
141 |
+
|
142 |
+
|
143 |
+
def summarise(collection, call_id, user_id, target_language):
|
144 |
+
# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
|
145 |
+
|
146 |
+
config = dotenv_values(".env")
|
147 |
+
client = OpenAI(api_key=config["OPENAI_API_KEY"])
|
148 |
+
|
149 |
+
# get caption text using call_id
|
150 |
+
caption_text = get_caption_text(collection, call_id, user_id)
|
151 |
+
|
152 |
+
chat_completion = client.chat.completions.create(
|
153 |
+
messages=[
|
154 |
+
{
|
155 |
+
"role": "user",
|
156 |
+
"content": f"The following is an extract from a call transcript. Rewrite this as a structured, clear summary in {target_language}. \
|
157 |
+
\n\Call Transcript: \"\"\"\n{caption_text}\n\"\"\"\n"
|
158 |
+
}
|
159 |
+
],
|
160 |
+
model="gpt-3.5-turbo",
|
161 |
+
)
|
162 |
+
|
163 |
+
# Gpt-3.5 turbo has 4096 token limit -> request will fail if exceeded
|
164 |
+
try:
|
165 |
+
result = chat_completion.choices[0].message.content
|
166 |
+
except:
|
167 |
+
return None
|
168 |
+
|
169 |
+
# BO - add result to mongodb
|
170 |
+
update_result = collection.update_one({"call_id": call_id}, {"$set": {f"summaries.{user_id}": result}})
|
171 |
+
|
172 |
+
if update_result.modified_count == 0:
|
173 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
|
174 |
+
|
175 |
+
|
176 |
+
# try parse summary and remove any leading summary prefixes
|
177 |
+
try:
|
178 |
+
return result.split(":")[1].strip()
|
179 |
+
except IndexError:
|
180 |
+
return result
|
181 |
+
|
182 |
+
|
183 |
+
def term_extraction(collection, call_id, user_id, target_language):
|
184 |
+
|
185 |
+
combined_text = get_caption_text(collection, call_id, user_id)
|
186 |
+
|
187 |
+
if len(combined_text) > 50: # > min_caption_length: -> poor term extraction on short transcripts
|
188 |
+
|
189 |
+
# Extract Key Terms from Concatenated Caption Field
|
190 |
+
key_terms = extract_terms(combined_text, target_language, len(combined_text))
|
191 |
+
|
192 |
+
update_result = collection.update_one({"call_id": call_id}, {"$set": {f"key_terms.{user_id}": key_terms}})
|
193 |
+
|
194 |
+
if update_result.modified_count == 0:
|
195 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
|
196 |
+
|
197 |
+
return key_terms
|
backend/mongodb/operations/users.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import Body, Request, HTTPException, status
|
2 |
+
from fastapi.encoders import jsonable_encoder
|
3 |
+
import sys
|
4 |
+
from ..models.users import User, UpdateUser
|
5 |
+
from bson import ObjectId
|
6 |
+
import re
|
7 |
+
|
8 |
+
|
9 |
+
def create_user(collection, user: User = Body(...)):
|
10 |
+
user = jsonable_encoder(user)
|
11 |
+
new_user = collection.insert_one(user)
|
12 |
+
created_user = collection.find_one({"_id": new_user.inserted_id})
|
13 |
+
return created_user
|
14 |
+
|
15 |
+
|
16 |
+
def list_users(collection, limit: int):
|
17 |
+
try:
|
18 |
+
users = list(collection.find(limit = limit))
|
19 |
+
return users
|
20 |
+
except:
|
21 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"No users found!")
|
22 |
+
|
23 |
+
|
24 |
+
def find_user(collection, user_id: str):
|
25 |
+
if (user := collection.find_one({"user_id": user_id})):
|
26 |
+
return user
|
27 |
+
else:
|
28 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")
|
29 |
+
|
30 |
+
def find_name_from_id(collection, user_id: str):
|
31 |
+
|
32 |
+
# find_one user record based on user id and project for user name
|
33 |
+
if (user_name := collection.find_one({"user_id": user_id}, {"name": 1, "_id": 0})):
|
34 |
+
return user_name['name'] # index name field from single field record returned
|
35 |
+
else:
|
36 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")
|
37 |
+
|
38 |
+
def find_user_name(collection, name: str):
|
39 |
+
# search for name in lowercase
|
40 |
+
if (user := collection.find_one({"name": re.compile('^' + re.escape(name) + '$', re.IGNORECASE)})):
|
41 |
+
return user
|
42 |
+
else:
|
43 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with name {name} not found!")
|
44 |
+
|
45 |
+
|
46 |
+
def find_user_email(collection, email: str):
|
47 |
+
if (user := collection.find_one({"email": re.compile('^' + re.escape(email) + '$', re.IGNORECASE)})):
|
48 |
+
return user
|
49 |
+
else:
|
50 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with Email Address {email} not found!")
|
51 |
+
|
52 |
+
|
53 |
+
''' Update user record based on user object/json'''
|
54 |
+
def update_user(collection, user_id: str, user: UpdateUser):
|
55 |
+
try:
|
56 |
+
user = {k: v for k, v in user.model_dump().items() if v is not None}
|
57 |
+
if len(user) >= 1:
|
58 |
+
update_result = collection.update_one({"user_id": user_id}, {"$set": user})
|
59 |
+
|
60 |
+
if update_result.modified_count == 0:
|
61 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id: '{user_id}' not found and updated!")
|
62 |
+
|
63 |
+
if (existing_users := collection.find_one({"user_id": user_id})) is not None:
|
64 |
+
return existing_users
|
65 |
+
except:
|
66 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id: '{user_id}' not found and updated!")
|
67 |
+
|
68 |
+
|
69 |
+
def delete_user(collection, user_id: str):
|
70 |
+
try:
|
71 |
+
deleted_user = collection.delete_one({"user_id": user_id})
|
72 |
+
|
73 |
+
if deleted_user.deleted_count == 1:
|
74 |
+
return f"User with user_id {user_id} deleted sucessfully"
|
75 |
+
except:
|
76 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")
|
backend/requirements.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
colorlog==6.8.2
|
2 |
+
contextlib2==21.6.0
|
3 |
+
fastapi==0.110.1
|
4 |
+
g2p_en==2.1.0
|
5 |
+
matplotlib==3.7.0
|
6 |
+
numpy==1.24.2
|
7 |
+
openai==1.20.0
|
8 |
+
protobuf==5.26.1
|
9 |
+
pydantic==2.7.0
|
10 |
+
pydub==0.25.1
|
11 |
+
pymongo==4.6.2
|
12 |
+
PySoundFile==0.9.0.post1
|
13 |
+
python-dotenv==1.0.1
|
14 |
+
python-socketio==5.9.0
|
15 |
+
pymongo==4.6.2
|
16 |
+
Requests==2.31.0
|
17 |
+
sentencepiece==0.1.99
|
18 |
+
simuleval==1.1.4
|
19 |
+
soundfile==0.12.1
|
20 |
+
spacy==3.7.4
|
21 |
+
pytextrank==3.3.0
|
22 |
+
torch==2.1.2
|
23 |
+
torchaudio==2.1.2
|
24 |
+
#transformers==4.20.1
|
25 |
+
uvicorn==0.29.0
|
26 |
+
vad==1.0.2
|
27 |
+
hf_transfer==0.1.4
|
28 |
+
huggingface_hub==0.19.4
|
backend/routes/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from.routing import router
|
backend/routes/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (235 Bytes). View file
|
|
backend/routes/__pycache__/routing.cpython-310.pyc
ADDED
Binary file (375 Bytes). View file
|
|
backend/routes/routing.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
import sys
|
3 |
+
from mongodb.endpoints import users, calls
|
4 |
+
|
5 |
+
router = APIRouter()
|
6 |
+
router.include_router(calls.router)
|
7 |
+
router.include_router(users.router)
|
backend/tests/.pytest_cache/.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
# Created by pytest automatically.
|
2 |
+
*
|
backend/tests/.pytest_cache/CACHEDIR.TAG
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Signature: 8a477f597d28d172789f06886806bc55
|
2 |
+
# This file is a cache directory tag created by pytest.
|
3 |
+
# For information about cache directory tags, see:
|
4 |
+
# https://bford.info/cachedir/spec.html
|
backend/tests/.pytest_cache/README.md
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pytest cache directory #
|
2 |
+
|
3 |
+
This directory contains data from the pytest's cache plugin,
|
4 |
+
which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
|
5 |
+
|
6 |
+
**Do not** commit this to version control.
|
7 |
+
|
8 |
+
See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.
|
backend/tests/.pytest_cache/v/cache/lastfailed
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test_client.py": true,
|
3 |
+
"unit_test.py::test_create_calls_success": true,
|
4 |
+
"unit_test.py::test_create_calls_failure": true,
|
5 |
+
"test_main.py::test_connect": true,
|
6 |
+
"test_main.py::test_disconnect": true,
|
7 |
+
"test_main.py::test_target_language": true,
|
8 |
+
"test_main.py::test_incoming_audio": true,
|
9 |
+
"test_main.py": true,
|
10 |
+
"unit_test.py::TestClient": true
|
11 |
+
}
|
backend/tests/.pytest_cache/v/cache/nodeids
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
"integration_test.py::test_extracion_pass2",
|
3 |
+
"integration_test.py::test_extraction_fail",
|
4 |
+
"integration_test.py::test_extraction_pass",
|
5 |
+
"integration_test.py::test_search_fail",
|
6 |
+
"integration_test.py::test_search_pass",
|
7 |
+
"integration_test.py::test_search_pass2",
|
8 |
+
"integration_test.py::test_summary_fail",
|
9 |
+
"integration_test.py::test_summary_fail2",
|
10 |
+
"integration_test.py::test_summary_pass",
|
11 |
+
"integration_test.py::test_summary_pass2",
|
12 |
+
"test_client.py::test_client_add_bytes",
|
13 |
+
"test_client.py::test_client_init",
|
14 |
+
"test_client.py::test_client_resample_and_clear",
|
15 |
+
"test_client.py::test_client_vad",
|
16 |
+
"test_main.py::test_connect",
|
17 |
+
"test_main.py::test_disconnect",
|
18 |
+
"test_main.py::test_incoming_audio",
|
19 |
+
"test_main.py::test_target_language",
|
20 |
+
"unit_test.py::test_create_call_pass",
|
21 |
+
"unit_test.py::test_create_calls_failure",
|
22 |
+
"unit_test.py::test_create_calls_success",
|
23 |
+
"unit_test.py::test_create_user_pass",
|
24 |
+
"unit_test.py::test_delete_user_fail",
|
25 |
+
"unit_test.py::test_delete_user_pass",
|
26 |
+
"unit_test.py::test_find_call_fail",
|
27 |
+
"unit_test.py::test_find_call_pass",
|
28 |
+
"unit_test.py::test_find_name_id_fail",
|
29 |
+
"unit_test.py::test_find_name_id_pass",
|
30 |
+
"unit_test.py::test_find_user_call_fail",
|
31 |
+
"unit_test.py::test_find_user_call_pass",
|
32 |
+
"unit_test.py::test_find_user_fail",
|
33 |
+
"unit_test.py::test_find_user_pass",
|
34 |
+
"unit_test.py::test_get_captions_fail",
|
35 |
+
"unit_test.py::test_get_captions_pass",
|
36 |
+
"unit_test.py::test_root_pass",
|
37 |
+
"unit_test.py::test_update_call_fail",
|
38 |
+
"unit_test.py::test_update_call_pass",
|
39 |
+
"unit_test.py::test_update_caption_pass",
|
40 |
+
"unit_test.py::test_update_user_fail",
|
41 |
+
"unit_test.py::test_update_user_pass"
|
42 |
+
]
|
backend/tests/.pytest_cache/v/cache/stepwise
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[]
|
backend/tests/__init__.py
ADDED
File without changes
|
backend/tests/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (196 Bytes). View file
|
|
backend/tests/__pycache__/integration_test.cpython-310-pytest-8.1.1.pyc
ADDED
Binary file (3.98 kB). View file
|
|
backend/tests/__pycache__/test_client.cpython-310-pytest-8.1.1.pyc
ADDED
Binary file (6.95 kB). View file
|
|
backend/tests/__pycache__/test_main.cpython-310-pytest-8.1.1.pyc
ADDED
Binary file (3.92 kB). View file
|
|
backend/tests/__pycache__/test_main.cpython-310.pyc
ADDED
Binary file (2.2 kB). View file
|
|
backend/tests/__pycache__/unit_test.cpython-310-pytest-8.1.1.pyc
ADDED
Binary file (6.16 kB). View file
|
|
backend/tests/integration_test.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import dotenv_values
|
3 |
+
from fastapi import FastAPI
|
4 |
+
from pymongo import MongoClient
|
5 |
+
from main import requests
|
6 |
+
import uuid
|
7 |
+
import pytest
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
import requests
|
10 |
+
import json
|
11 |
+
|
12 |
+
# Test Fuzzy Search Integrated component on existing call records
|
13 |
+
def test_search_pass():
|
14 |
+
|
15 |
+
# Test against records with mention of 'Football'
|
16 |
+
response = requests.get("http://127.0.0.1:8080/call/fuzzy-search/ozpHhyum3sayTdxIKUAtF51uvWJ2/football")
|
17 |
+
|
18 |
+
assert response.status_code == 200
|
19 |
+
assert len(response.json()) == 3 # three matching call transcripts
|
20 |
+
|
21 |
+
# Test Fuzzy Search Integrated component on existing call records
|
22 |
+
def test_search_pass2():
|
23 |
+
|
24 |
+
# Test against records with mention of 'Football' mispelled as 'Footbll'
|
25 |
+
response = requests.get("http://127.0.0.1:8080/call/fuzzy-search/ozpHhyum3sayTdxIKUAtF51uvWJ2/footbll")
|
26 |
+
|
27 |
+
assert response.status_code == 200
|
28 |
+
assert len(response.json()) == 3 # still three matching call transcripts
|
29 |
+
|
30 |
+
# Test Fuzzy Search Integrated component on existing call records
|
31 |
+
def test_search_fail():
|
32 |
+
|
33 |
+
# Test against records with mention of 'Football
|
34 |
+
response = requests.get("http://127.0.0.1:8080/call/fuzzy-search/ozpHhyum3sayTdxIKUAtF51uvWJ2/basketball")
|
35 |
+
|
36 |
+
assert response.status_code == 200
|
37 |
+
assert len(response.json()) == 0 # no matching call transcripts
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
# Test Summarisation Integrated component on existing call records
|
43 |
+
def test_summary_pass():
|
44 |
+
|
45 |
+
# Test with summarisation of english version transcript
|
46 |
+
response = requests.get("http://127.0.0.1:8080//call/summarise/FCnORXmLkw48G5mgscBV/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng")
|
47 |
+
|
48 |
+
assert response.status_code == 200
|
49 |
+
|
50 |
+
def test_summary_pass2():
|
51 |
+
|
52 |
+
# Test with summarisation of polish version transcript
|
53 |
+
response = requests.get("http://127.0.0.1:8080//call/summarise/FCnORXmLkw48G5mgscBV/fNGMkWoSK7fxwE3tbp8E816sthd2/pol")
|
54 |
+
|
55 |
+
assert response.status_code == 200
|
56 |
+
|
57 |
+
def test_summary_fail():
|
58 |
+
|
59 |
+
# Test with summarisation of english version transcript
|
60 |
+
response = requests.get("http://127.0.0.1:8080//call/summarise/falseID/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng") # non exising call record
|
61 |
+
|
62 |
+
assert response.status_code == 404
|
63 |
+
|
64 |
+
def test_summary_fail2():
|
65 |
+
|
66 |
+
# Test with summarisation of english version transcript
|
67 |
+
response = requests.get("http://127.0.0.1:8080//call/summarise/FCnORXmLkw48G5mgscBV/falseID/eng") # non exising user record
|
68 |
+
|
69 |
+
assert response.status_code == 404
|
70 |
+
|
71 |
+
|
72 |
+
# Test Key Key Extraction Integrated component on existing call records
|
73 |
+
def test_extraction_pass():
|
74 |
+
|
75 |
+
# Test against records with mention of 'Football'
|
76 |
+
response = requests.get("http://127.0.0.1:8080//call/term-extraction/FCnORXmLkw48G5mgscBV/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng")
|
77 |
+
|
78 |
+
assert response.status_code == 200
|
79 |
+
assert len(response.json()) == 3 # still three matching call transcripts
|
80 |
+
|
81 |
+
|
82 |
+
# Test Fuzzy Search Integrated component on existing call records
|
83 |
+
def test_extracion_pass2():
|
84 |
+
|
85 |
+
# Test against records with mention of 'Football' mispelled as 'Footbll'
|
86 |
+
response = requests.get("http://127.0.0.1:8080//call/term-extraction/FCnORXmLkw48G5mgscBV/fNGMkWoSK7fxwE3tbp8E816sthd2/pol")
|
87 |
+
|
88 |
+
assert response.status_code == 200
|
89 |
+
assert len(response.json()) == 3 # still three matching call transcripts
|
90 |
+
|
91 |
+
# Test Fuzzy Search Integrated component on existing call records
|
92 |
+
def test_extraction_fail():
|
93 |
+
|
94 |
+
# Test against records with mention of 'Football
|
95 |
+
response = requests.get("http://127.0.0.1:8080//call/term-extraction/FCnORXmLkw48G5mgscBV/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng")
|
96 |
+
|
97 |
+
assert response.status_code == 200
|
98 |
+
assert len(response.json()) == 0 # no matching call transcripts
|
backend/tests/silence.wav
ADDED
Binary file (302 kB). View file
|
|
backend/tests/speaking.wav
ADDED
Binary file (255 kB). View file
|
|
backend/tests/test_client.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import wave
|
3 |
+
import pytest
|
4 |
+
import torchaudio
|
5 |
+
import os
|
6 |
+
import sys
|
7 |
+
|
8 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
9 |
+
parent_dir = os.path.dirname(current_dir)
|
10 |
+
sys.path.append(parent_dir)
|
11 |
+
from Client import Client
|
12 |
+
|
13 |
+
|
14 |
+
@pytest.fixture
|
15 |
+
def mock_client():
|
16 |
+
client = Client("test_sid", "test_client_id", "testusername", original_sr=44100)
|
17 |
+
return client
|
18 |
+
|
19 |
+
def test_client_init(mock_client):
|
20 |
+
assert mock_client.sid == "test_sid"
|
21 |
+
assert mock_client.client_id == "test_client_id"
|
22 |
+
assert mock_client.call_id == None
|
23 |
+
assert mock_client.buffer == bytearray()
|
24 |
+
assert mock_client.output_path == "test_sid_output_audio.wav"
|
25 |
+
assert mock_client.target_language == None
|
26 |
+
assert mock_client.original_sr == 44100
|
27 |
+
assert mock_client.vad.sample_rate == 16000
|
28 |
+
assert mock_client.vad.frame_length == 25
|
29 |
+
assert mock_client.vad.frame_shift == 20
|
30 |
+
assert mock_client.vad.energy_threshold == 0.05
|
31 |
+
assert mock_client.vad.pre_emphasis == 0.95
|
32 |
+
|
33 |
+
def test_client_add_bytes(mock_client):
|
34 |
+
mock_client.add_bytes(b"test")
|
35 |
+
assert mock_client.buffer == b"test"
|
36 |
+
|
37 |
+
def test_client_resample_and_clear(mock_client):
|
38 |
+
location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
39 |
+
speaking_bytes = wave.open(location + "/speaking.wav", "rb").readframes(-1)
|
40 |
+
mock_client.add_bytes(speaking_bytes)
|
41 |
+
resampled_waveform = mock_client.resample_and_clear()
|
42 |
+
torchaudio.save(location + "testoutput.wav", resampled_waveform, 16000)
|
43 |
+
with wave.open(location + "testoutput.wav", "rb") as wf:
|
44 |
+
sample_rate = wf.getframerate()
|
45 |
+
assert mock_client.buffer == bytearray()
|
46 |
+
assert sample_rate == 16000
|
47 |
+
|
48 |
+
def test_client_vad(mock_client):
|
49 |
+
location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
50 |
+
speaking_bytes = wave.open(location + "/speaking.wav", "rb").readframes(-1)
|
51 |
+
mock_client.add_bytes(speaking_bytes)
|
52 |
+
resampled_waveform = mock_client.resample_and_clear()
|
53 |
+
assert mock_client.buffer == bytearray()
|
54 |
+
assert mock_client.vad_analyse(resampled_waveform) == True
|
55 |
+
silent_bytes = wave.open(location + "/silence.wav", "rb").readframes(-1)
|
56 |
+
mock_client.add_bytes(silent_bytes)
|
57 |
+
resampled_waveform = mock_client.resample_and_clear()
|
58 |
+
assert mock_client.buffer == bytearray()
|
59 |
+
assert mock_client.vad_analyse(resampled_waveform) == False
|
backend/tests/test_main.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import dotenv_values
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from pymongo import MongoClient
|
4 |
+
import pytest
|
5 |
+
from unittest.mock import AsyncMock, MagicMock, ANY
|
6 |
+
import socketio
|
7 |
+
|
8 |
+
import os
|
9 |
+
import sys
|
10 |
+
|
11 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
12 |
+
parent_dir = os.path.dirname(current_dir)
|
13 |
+
sys.path.append(parent_dir)
|
14 |
+
|
15 |
+
from Client import Client
|
16 |
+
from main import sio, connect, disconnect, target_language, call_user, answer_call, incoming_audio, clients, rooms, app
|
17 |
+
from unittest.mock import patch
|
18 |
+
|
19 |
+
sio = socketio.AsyncServer(
|
20 |
+
async_mode="asgi",
|
21 |
+
cors_allowed_origins="*",
|
22 |
+
# engineio_logger=logger,
|
23 |
+
)
|
24 |
+
|
25 |
+
config = dotenv_values(".env")
|
26 |
+
|
27 |
+
# Read connection string from environment vars
|
28 |
+
# uri = os.environ['MONGODB_URI']
|
29 |
+
|
30 |
+
# Read connection string from .env file
|
31 |
+
uri = config['MONGODB_URI']
|
32 |
+
app.mongodb_client = MongoClient(uri)
|
33 |
+
app.database = app.mongodb_client['IT-Cluster1'] #connect to interpretalk primary db
|
34 |
+
try:
|
35 |
+
app.mongodb_client.admin.command('ping')
|
36 |
+
print("MongoDB Connection Established...")
|
37 |
+
except Exception as e:
|
38 |
+
print(e)
|
39 |
+
# shutdown logic
|
40 |
+
print("Closing MongoDB Connection...")
|
41 |
+
|
42 |
+
@pytest.fixture(autouse=True)
|
43 |
+
def setup_clients_and_rooms():
|
44 |
+
global clients, rooms
|
45 |
+
clients.clear()
|
46 |
+
rooms.clear()
|
47 |
+
yield
|
48 |
+
|
49 |
+
@pytest.fixture
|
50 |
+
def mock_client():
|
51 |
+
client = Client("test_sid", "test_client_id", "testusername", original_sr=44100)
|
52 |
+
return client
|
53 |
+
|
54 |
+
|
55 |
+
@pytest.mark.asyncio
|
56 |
+
async def test_connect(mock_client):
|
57 |
+
sid = mock_client.sid
|
58 |
+
environ = {'QUERY_STRING': 'client_id=test_client_id'}
|
59 |
+
await connect(sid, environ)
|
60 |
+
app.mongodb_client.close()
|
61 |
+
assert sid in clients
|
62 |
+
|
63 |
+
@pytest.mark.asyncio
|
64 |
+
async def test_disconnect(mock_client):
|
65 |
+
sid = mock_client.sid
|
66 |
+
clients[sid] = mock_client
|
67 |
+
await disconnect(sid)
|
68 |
+
assert sid not in clients
|
69 |
+
|
70 |
+
@pytest.mark.asyncio
|
71 |
+
async def test_target_language(mock_client):
|
72 |
+
sid = mock_client.sid
|
73 |
+
clients[sid] = mock_client
|
74 |
+
target_lang = "fr"
|
75 |
+
await target_language(sid, target_lang)
|
76 |
+
assert clients[sid].target_language == "fr"
|
77 |
+
|
78 |
+
@pytest.mark.asyncio
|
79 |
+
async def test_incoming_audio(mock_client):
|
80 |
+
sid = mock_client.sid
|
81 |
+
clients[sid] = mock_client
|
82 |
+
data = b"\x01"
|
83 |
+
call_id = "1234"
|
84 |
+
await incoming_audio(sid, data, call_id)
|
85 |
+
assert clients[sid].get_length() != 0
|
86 |
+
|
backend/tests/unit_test.py
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import dotenv_values
|
3 |
+
from fastapi import FastAPI
|
4 |
+
from pymongo import MongoClient
|
5 |
+
from main import requests
|
6 |
+
import uuid
|
7 |
+
import pytest
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
import requests
|
10 |
+
import json
|
11 |
+
|
12 |
+
|
13 |
+
# Test Root endpoint
|
14 |
+
def test_root_pass():
|
15 |
+
response = requests.get("http://127.0.0.1:8080/")
|
16 |
+
|
17 |
+
assert response.status_code == 200
|
18 |
+
assert response.json() == {"message": "Welcome to InterpreTalk!"}
|
19 |
+
|
20 |
+
|
21 |
+
# POST /user/
|
22 |
+
# Test DB user record creation including response validation
|
23 |
+
def test_create_user_pass():
|
24 |
+
payload = {
|
25 |
+
"name": "Tester1",
|
26 |
+
"user_id": "testerID",
|
27 |
+
"email": "[email protected]"
|
28 |
+
}
|
29 |
+
|
30 |
+
response = requests.post("http://127.0.0.1:8080/user/", json=payload)
|
31 |
+
assert response.status_code == 201
|
32 |
+
|
33 |
+
|
34 |
+
'''Test User Endpoints'''
|
35 |
+
|
36 |
+
# GET /user/
|
37 |
+
# Test finding DB user record based on user ID
|
38 |
+
def test_find_user_pass():
|
39 |
+
response = requests.get("http://localhost:8080/user/ozpHhyum3sayTdxIKUAtF51uvWJ2") # existing user record
|
40 |
+
|
41 |
+
assert response.status_code == 200
|
42 |
+
assert response.json() == {
|
43 |
+
"user_id": "ozpHhyum3sayTdxIKUAtF51uvWJ2",
|
44 |
+
"name": "Benjamin",
|
45 |
+
"email": "[email protected]"
|
46 |
+
}
|
47 |
+
|
48 |
+
|
49 |
+
def test_find_user_fail():
|
50 |
+
response = requests.get(f"http://127.0.0.1:8080/users/fakeID") # non-existing user record
|
51 |
+
|
52 |
+
# check if response is inteded error code
|
53 |
+
assert response.status_code == 404
|
54 |
+
|
55 |
+
|
56 |
+
# PUT /user/{user_id}
|
57 |
+
# Updating DB user record based on user ID
|
58 |
+
def test_update_user_pass():
|
59 |
+
payload = {
|
60 |
+
"name": "TesterNewName"
|
61 |
+
}
|
62 |
+
response = requests.patch(f"http://127.0.0.1:8080/users/testerID", json=payload)
|
63 |
+
|
64 |
+
assert response.status_code == 202
|
65 |
+
assert response.json() == {
|
66 |
+
"name": "TesterNewName",
|
67 |
+
"user_id": "testerID",
|
68 |
+
"email": "[email protected]"
|
69 |
+
}
|
70 |
+
|
71 |
+
# Test with non-existing user ID
|
72 |
+
def test_update_user_fail():
|
73 |
+
payload = {
|
74 |
+
"name": "TesterNewName"
|
75 |
+
}
|
76 |
+
|
77 |
+
response = requests.patch(f"http://127.0.0.1:8080/users/falseID", json=payload)
|
78 |
+
assert response.status_code == 404
|
79 |
+
|
80 |
+
|
81 |
+
# DELETE /user/{user_id}
|
82 |
+
def test_delete_user_pass():
|
83 |
+
response = requests.delete(f"http://127.0.0.1:8080/users/testerID")
|
84 |
+
assert response.status_code == 200
|
85 |
+
|
86 |
+
def test_delete_user_fail():
|
87 |
+
response = requests.delete(f"http://127.0.0.1:8080/users/fakeID")
|
88 |
+
assert response.status_code == 404
|
89 |
+
|
90 |
+
|
91 |
+
# GET /user/find-name-id/{user_ud}
|
92 |
+
def test_find_name_id_pass():
|
93 |
+
response = requests.get("http://127.0.0.1:8080/user/find-name-id/ozpHhyum3sayTdxIKUAtF51uvWJ2")
|
94 |
+
assert response.status_code == 201
|
95 |
+
assert response.json == {
|
96 |
+
'name': "Benjamin"
|
97 |
+
}
|
98 |
+
|
99 |
+
def test_find_name_id_fail():
|
100 |
+
response = requests.get("http://127.0.0.1:8080/user/find-name-id/falseID")
|
101 |
+
assert response.status_code == 404
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
'''Test Call endpoints'''
|
106 |
+
|
107 |
+
# POST /call/create-call
|
108 |
+
# Test creating call record
|
109 |
+
def test_create_call_pass():
|
110 |
+
payload = {
|
111 |
+
"call_id": "test001",
|
112 |
+
"caller_id": "tester01",
|
113 |
+
"callee_id": "tester02",
|
114 |
+
"captions": [
|
115 |
+
{
|
116 |
+
"author_id": "tester01",
|
117 |
+
"author_username": "tester",
|
118 |
+
"original_text": "It is a test",
|
119 |
+
"translated_text": "Es un prueba",
|
120 |
+
}
|
121 |
+
]
|
122 |
+
}
|
123 |
+
|
124 |
+
response = requests.post("http://127.0.0.1:8080/call/create-call", json=payload)
|
125 |
+
assert response.status_code == 201
|
126 |
+
|
127 |
+
|
128 |
+
# GET /call/find-call
|
129 |
+
# Test finding DB call record based on call ID
|
130 |
+
def test_find_call_pass():
|
131 |
+
response = requests.get(f"http://127.0.0.1:8080/call/test001") # existing user record
|
132 |
+
|
133 |
+
assert response.status_code == 200
|
134 |
+
assert response.json() == {
|
135 |
+
"call_id": "test001",
|
136 |
+
"caller_id": "tester01",
|
137 |
+
"callee_id": "tester02",
|
138 |
+
"captions": [
|
139 |
+
{
|
140 |
+
"author_id": "tester01",
|
141 |
+
"author_username": "tester",
|
142 |
+
"original_text": "It is a test",
|
143 |
+
"translated_text": "Es un prueba",
|
144 |
+
}
|
145 |
+
]
|
146 |
+
}
|
147 |
+
|
148 |
+
|
149 |
+
def test_find_call_fail():
|
150 |
+
response = requests.get(f"http://127.0.0.1:8080/call/fakeID") # non-existing user record
|
151 |
+
|
152 |
+
# check if response is inteded error code
|
153 |
+
assert response.status_code == 404
|
154 |
+
|
155 |
+
|
156 |
+
# GET /call/find-user-call
|
157 |
+
# Test finding DB call record based on user ID
|
158 |
+
def test_find_user_call_pass():
|
159 |
+
response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/tester01") # existing user record
|
160 |
+
|
161 |
+
assert response.status_code == 200
|
162 |
+
assert response.json() == {
|
163 |
+
"call_id": "test001",
|
164 |
+
"caller_id": "tester01",
|
165 |
+
"callee_id": "tester02",
|
166 |
+
"captions": [
|
167 |
+
{
|
168 |
+
"author_id": "tester01",
|
169 |
+
"author_username": "tester",
|
170 |
+
"original_text": "It is a test",
|
171 |
+
"translated_text": "Es un prueba",
|
172 |
+
}
|
173 |
+
]
|
174 |
+
}
|
175 |
+
|
176 |
+
def test_find_user_call_fail():
|
177 |
+
response = requests.get(f"http://127.0.0.1:8080/calls/fakeID") # non-existing user record
|
178 |
+
|
179 |
+
# check if response is inteded error code
|
180 |
+
assert response.status_code == 404
|
181 |
+
|
182 |
+
|
183 |
+
# GET /call/get-captions
|
184 |
+
# Test finding DB call record based on user ID
|
185 |
+
def test_get_captions_pass():
|
186 |
+
response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/test001/tester01") # existing user record
|
187 |
+
|
188 |
+
assert response.status_code == 200
|
189 |
+
assert response.json() == {
|
190 |
+
"call_id": "test001",
|
191 |
+
"caller_id": "tester01",
|
192 |
+
"callee_id": "tester02",
|
193 |
+
"captions": [
|
194 |
+
{
|
195 |
+
"author_id": "tester01",
|
196 |
+
"author_username": "tester",
|
197 |
+
"original_text": "It is a test",
|
198 |
+
"translated_text": "Es un prueba",
|
199 |
+
}
|
200 |
+
]
|
201 |
+
}
|
202 |
+
|
203 |
+
def test_get_captions_fail():
|
204 |
+
response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/test001/tester00") # fake user record
|
205 |
+
|
206 |
+
# check if response is inteded error code
|
207 |
+
assert response.status_code == 404
|
208 |
+
|
209 |
+
def test_get_captions_fail():
|
210 |
+
response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/test000/tester01") # fake call record
|
211 |
+
|
212 |
+
# check if response is inteded error code
|
213 |
+
assert response.status_code == 404
|
214 |
+
|
215 |
+
|
216 |
+
# GET /call/update-call/{call_id}
|
217 |
+
# test updating call record based on id
|
218 |
+
def test_update_call_pass():
|
219 |
+
payload = {
|
220 |
+
"callee_id": "TesterNewName"
|
221 |
+
}
|
222 |
+
response = requests.patch(f"http://127.0.0.1:8080/call/update-call/tester02", json=payload)
|
223 |
+
|
224 |
+
assert response.status_code == 202
|
225 |
+
assert response.json() == {
|
226 |
+
"call_id": "test001",
|
227 |
+
"caller_id": "tester01",
|
228 |
+
"callee_id": "tester02",
|
229 |
+
"captions": [
|
230 |
+
{
|
231 |
+
"author_id": "tester01",
|
232 |
+
"author_username": "tester",
|
233 |
+
"original_text": "It is a test",
|
234 |
+
"translated_text": "Es un prueba",
|
235 |
+
}
|
236 |
+
]
|
237 |
+
}
|
238 |
+
|
239 |
+
# Test with non-existing user ID
|
240 |
+
def test_update_call_fail():
|
241 |
+
payload = {
|
242 |
+
"callee_id": "testName"
|
243 |
+
}
|
244 |
+
|
245 |
+
response = requests.patch(f"http://127.0.0.1:8080/users/falseID", json=payload)
|
246 |
+
assert response.status_code == 404
|
247 |
+
|
248 |
+
|
249 |
+
# GET /call/update-captions/{call_id}
|
250 |
+
# test updating caption record based on id
|
251 |
+
def test_update_caption_pass():
|
252 |
+
payload = {
|
253 |
+
"author_username": "testerNew"
|
254 |
+
}
|
255 |
+
response = requests.patch(f"http://127.0.0.1:8080/call/update-caption/tester01", json=payload)
|
256 |
+
|
257 |
+
assert response.status_code == 202
|
258 |
+
|
259 |
+
|
260 |
+
# Test with non-existing user ID
|
261 |
+
def test_update_call_fail():
|
262 |
+
payload = {
|
263 |
+
"callee_id": "testName"
|
264 |
+
}
|
265 |
+
|
266 |
+
response = requests.patch(f"http://127.0.0.1:8080/update-caption/falseID", json=payload)
|
267 |
+
assert response.status_code == 404
|
268 |
+
|
269 |
+
|
270 |
+
# DELETE /call/delete-call/{call_id}
|
271 |
+
def test_delete_user_pass():
|
272 |
+
response = requests.delete(f"http://127.0.0.1:8080//call/delete-call/test001")
|
273 |
+
assert response.status_code == 200
|
274 |
+
|
275 |
+
def test_delete_user_fail():
|
276 |
+
response = requests.delete(f"http://127.0.0.1:8080//call/delete-call/test009")
|
277 |
+
assert response.status_code == 404
|
backend/utils/__pycache__/text_rank.cpython-310.pyc
ADDED
Binary file (2.03 kB). View file
|
|
backend/utils/text_rank.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import spacy
|
2 |
+
import pytextrank
|
3 |
+
from spacy.tokens import Span
|
4 |
+
|
5 |
+
# Define decorator for converting to singular version of words
|
6 |
+
@spacy.registry.misc("plural_scrubber")
|
7 |
+
def plural_scrubber():
|
8 |
+
def scrubber_func(span: Span) -> str:
|
9 |
+
return span.lemma_
|
10 |
+
return scrubber_func
|
11 |
+
|
12 |
+
|
13 |
+
def model_selector(target_language: str):
|
14 |
+
|
15 |
+
# Load subset of non-english models
|
16 |
+
language_model = {
|
17 |
+
"spa": "es_core_news_sm",
|
18 |
+
"fra": "fr_core_news_sm",
|
19 |
+
"pol": "pl_core_news_sm",
|
20 |
+
"deu": "de_core_news_sm",
|
21 |
+
"ita": "it_core_news_sm",
|
22 |
+
"por": "pt_core_news_sm",
|
23 |
+
"nld": "nl_core_news_sm",
|
24 |
+
"fin": "fi_core_news_sm",
|
25 |
+
"ron": "ro_core_news_sm",
|
26 |
+
"rus": "ru_core_news_sm"
|
27 |
+
}
|
28 |
+
|
29 |
+
try:
|
30 |
+
nlp = spacy.load(language_model[target_language])
|
31 |
+
|
32 |
+
except KeyError:
|
33 |
+
# Load a spaCy English model
|
34 |
+
nlp = spacy.load("en_core_web_lg")
|
35 |
+
|
36 |
+
# Add TextRank component to pipeline with stopwords
|
37 |
+
nlp.add_pipe("textrank", config={
|
38 |
+
"stopwords": {token:["NOUN"] for token in nlp.Defaults.stop_words},
|
39 |
+
"scrubber": {"@misc": "plural_scrubber"}})
|
40 |
+
|
41 |
+
return nlp
|
42 |
+
|
43 |
+
|
44 |
+
def extract_terms(text, target_language, length):
|
45 |
+
nlp = model_selector(target_language)
|
46 |
+
|
47 |
+
# Perform fact extraction on overall summary and segment summaries
|
48 |
+
doc = nlp(text)
|
49 |
+
|
50 |
+
if length < 100:
|
51 |
+
# Get single most used key term
|
52 |
+
phrases = {phrase.text for phrase in doc._.phrases[:1]}
|
53 |
+
elif length > 100 and length < 300:
|
54 |
+
# Create unique set from top 2 ranked phrases
|
55 |
+
phrases = {phrase.text for phrase in doc._.phrases[:2]}
|
56 |
+
if length > 300:
|
57 |
+
# Create unique set from top 3 ranked phrases
|
58 |
+
phrases = {phrase.text for phrase in doc._.phrases[:3]}
|
59 |
+
|
60 |
+
return list(phrases)
|