Spaces:

Justtalk
/

JusTalk

Running

App Files Files Community

A-yum1 commited on Mar 29

Commit

0dbd483

1 Parent(s): 64a73ca

Update process.py

Browse files

Files changed (5) hide show

__pycache__/process.cpython-310.pyc +0 -0
__pycache__/transcription.cpython-310.pyc +0 -0
app.py +2 -2
process.py +27 -9
transcription.py +1 -48

__pycache__/process.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/process.cpython-310.pyc and b/__pycache__/process.cpython-310.pyc differ

__pycache__/transcription.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/transcription.cpython-310.pyc and b/__pycache__/transcription.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -325,8 +325,8 @@ def upload_audio():
         # 複数人の場合は参照パスのリストを、1人の場合は単一のパスを渡す
         if len(users) > 1:
             print("複数人の場合の処理")
-            matched_times, segments_dir = process.process_multi_audio(reference_paths, audio_path, threshold=0.05)
-            total_audio = transcripter.merge_segments(segments_dir)
             # 各メンバーのrateを計算
             total_time = sum(matched_times)
             rates = [(time / total_time) * 100 if total_time > 0 else 0 for time in matched_times]

         # 複数人の場合は参照パスのリストを、1人の場合は単一のパスを渡す
         if len(users) > 1:
             print("複数人の場合の処理")
+            matched_times, merged_segments = process.process_multi_audio(reference_paths, audio_path, users, threshold=0.05)
+            total_audio = transcripter.save_marged_segments(merged_segments)
             # 各メンバーのrateを計算
             total_time = sum(matched_times)
             rates = [(time / total_time) * 100 if total_time > 0 else 0 for time in matched_times]

process.py CHANGED Viewed

@@ -245,21 +245,17 @@ class AudioProcessor():
             print(f"類似度計算でエラーが発生しました: {e}")
             return None
-    def process_audio(self, reference_path, input_path, user,output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5):
         """
         入力音声からリファレンス音声に類似したセグメントを抽出する
         Parameters:
-        isSpeaking(bool): 現在のセグメントがリファレンス音声と類似しているか
-        wasSpeaking(bool): 1つ前のセグメントがリファレンス音声と類似しているか
-        current_segment(list): 一致している、または一致しない話者のセグメントのストック
-        merged_segments(list): 要素は(一致するか(bool), セグメントのリスト)。書き起こしに利用。
         reference_path (str): リファレンス音声のパス
         input_path (str): 入力音声のパス
         output_folder (str): 類似セグメントを保存するディレクトリ
         seg_duration (float): セグメントの長さ（秒）
         threshold (float): 類似度の閾値
-        user(str): ユーザー名
         Returns:
         tuple: (マッチした時間（ミリ秒）, マッチしなかった時間（ミリ秒）, 分類済みのセグメント)
@@ -338,19 +334,20 @@ class AudioProcessor():
             print(f"音声処理でエラーが発生しました: {e}")
             return 0, 0, merged_segments
-    def process_multi_audio(self, reference_pathes, input_path, output_folder='/tmp/data/matched_multi_segments', seg_duration=1.0, threshold=0.5):
         """
         入力音声から複数のリファレンス音声に類似したセグメントを抽出する
         Parameters:
         reference_pathes (list): リファレンス音声のパスのリスト
         input_path (str): 入力音声のパス
         output_folder (str): 類似セグメントを保存するディレクトリ
         seg_duration (float): セグメントの長さ（秒）
         threshold (float): 類似度の閾値
         Returns:
-        tuple: (各リファレンスごとのマッチした時間のリスト, セグメントが保存されたディレクトリのパス)
         """
         try:
             # 出力先ディレクトリの中身をクリアする
@@ -430,13 +427,34 @@ class AudioProcessor():
             # 各セグメントについて、最も高い類似度のリファレンスを選択
             best_matches = []
             for seg_sim in similarity_transposed:
                 best_ref = np.argmax(seg_sim)  # 最も類似度の高いリファレンスのインデックス
                 # 閾値チェック
                 if seg_sim[best_ref] < threshold:
                     best_matches.append(None)  # 閾値未満の場合はマッチなしとする
                 else:
                     best_matches.append(best_ref)
             # 各リファレンスごとに一致時間を集計
             matched_time = [0] * len(reference_pathes)
@@ -444,7 +462,7 @@ class AudioProcessor():
                 if match is not None:
                     matched_time[match] += seg_duration
-            return matched_time, segmented_path
         except Exception as e:
             print(f"マルチ音声処理でエラーが発生しました: {e}")

             print(f"類似度計算でエラーが発生しました: {e}")
             return None
+    def process_audio(self, reference_path, input_path, user, output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5):
         """
         入力音声からリファレンス音声に類似したセグメントを抽出する
         Parameters:
         reference_path (str): リファレンス音声のパス
         input_path (str): 入力音声のパス
+        user(str): ユーザー名
         output_folder (str): 類似セグメントを保存するディレクトリ
         seg_duration (float): セグメントの長さ（秒）
         threshold (float): 類似度の閾値
         Returns:
         tuple: (マッチした時間（ミリ秒）, マッチしなかった時間（ミリ秒）, 分類済みのセグメント)
             print(f"音声処理でエラーが発生しました: {e}")
             return 0, 0, merged_segments
+    def process_multi_audio(self, reference_pathes, input_path, users, output_folder='/tmp/data/matched_multi_segments', seg_duration=1.0, threshold=0.5):
         """
         入力音声から複数のリファレンス音声に類似したセグメントを抽出する
         Parameters:
         reference_pathes (list): リファレンス音声のパスのリスト
         input_path (str): 入力音声のパス
+        users(list): ユーザーのリスト
         output_folder (str): 類似セグメントを保存するディレクトリ
         seg_duration (float): セグメントの長さ（秒）
         threshold (float): 類似度の閾値
         Returns:
+        tuple: (各リファレンスごとのマッチした時間のリスト, 分類済みのセグメント)
         """
         try:
             # 出力先ディレクトリの中身をクリアする
             # 各セグメントについて、最も高い類似度のリファレンスを選択
             best_matches = []
+            speakers = []
             for seg_sim in similarity_transposed:
                 best_ref = np.argmax(seg_sim)  # 最も類似度の高いリファレンスのインデックス
                 # 閾値チェック
                 if seg_sim[best_ref] < threshold:
                     best_matches.append(None)  # 閾値未満の場合はマッチなしとする
+                    speakers.append(-1) # Noneは都合が悪いので-1
                 else:
                     best_matches.append(best_ref)
+                    speakers.append(best_ref)
+            current_speaker = None
+            current_segments = []
+            merged_segments = []
+            for index,file in enumerate(segment_files,start=0):
+                file_path = os.path.join(segmented_path, file)
+                speaker = users[speakers[index]]
+                if speaker == -1:
+                    continue
+                if current_speaker != speaker:
+                    if current_segments:
+                        merged_segments.append((current_speaker,current_segments))
+                    current_speaker = speaker
+                    current_segments = [file_path]
+                else:
+                    current_segments.append(file_path)
+            if current_segments:
+                merged_segments.append((current_speaker,current_segments))
             # 各リファレンスごとに一致時間を集計
             matched_time = [0] * len(reference_pathes)
                 if match is not None:
                     matched_time[match] += seg_duration
+            return matched_time, merged_segments
         except Exception as e:
             print(f"マルチ音声処理でエラーが発生しました: {e}")

transcription.py CHANGED Viewed

@@ -68,6 +68,7 @@ class TranscriptionMaker():
             raise
         return output_file
     def combine_audio(self,audio_files):
         if not audio_files:
             raise
@@ -91,7 +92,6 @@ class TranscriptionMaker():
         return merged_segments
     # ディレクトリ内の音声ファイルを並べ替える
     def sort_audio_files_in_directory(self, directory):
         files = os.listdir(directory)
@@ -99,50 +99,6 @@ class TranscriptionMaker():
         audio_files.sort(key=lambda x: datetime.strptime(x.split("_")[1].split(".")[0], "%Y%m%d%H%M%S"))
         return [os.path.join(directory, f) for f in audio_files]
-    #ファイル名が連続しているならくっつける
-    '''
-    def merge_segments(self,segments_dir,output_dir = "/tmp/data/merged_segment"):
-        if not os.path.exists(output_dir):
-            os.makedirs(output_dir, exist_ok=True)
-        files = sorted([f for f in os.listdir(segments_dir) if f.endswith('.wav')])
-        merged_files = []
-        current_group = []
-        previous_index = None
-        for file in files:
-            # ファイル名から番号を抽出（例: "0.wav" -> 0）
-            file_index = int(file.split('.')[0])
-            # 番号が連続していない場合、新しいグループを作成
-            if previous_index is not None and file_index != previous_index + 1:
-                # 現在のグループを結合して保存
-                if current_group:
-                    merged_files.append(current_group)
-                current_group = []
-            # 現在のファイルをグループに追加
-            current_group.append(file)
-            previous_index = file_index
-        # 最後のグループを追加
-        if current_group:
-            merged_files.append(current_group)
-        # グループごとに結合して保存
-        for i, group in enumerate(merged_files):
-            combined_audio = AudioSegment.empty()
-            for file in group:
-                file_path = os.path.join(segments_dir, file)
-                segment = AudioSegment.from_file(file_path)
-                combined_audio += segment
-            # 出力ファイル名を設定して保存
-            output_file = os.path.join(output_dir, self.generate_filename(3))
-            combined_audio.export(output_file, format='wav')
-        return output_dir'''
     def save_marged_segments(self,merged_segments,output_directory='/tmp/data/conversations'):
         if not merged_segments:
@@ -165,7 +121,6 @@ class TranscriptionMaker():
         return output_directory
     def merge_segments(self,segments):
         combined = AudioSegment.empty()  # 空のAudioSegmentを初期化
@@ -181,8 +136,6 @@ class TranscriptionMaker():
             combined += audio
         return combined
     def generate_random_string(self,length):
         letters = string.ascii_letters + string.digits

             raise
         return output_file
+    # 受け取った音声ファイルを話者ごとに整理する
     def combine_audio(self,audio_files):
         if not audio_files:
             raise
         return merged_segments
     # ディレクトリ内の音声ファイルを並べ替える
     def sort_audio_files_in_directory(self, directory):
         files = os.listdir(directory)
         audio_files.sort(key=lambda x: datetime.strptime(x.split("_")[1].split(".")[0], "%Y%m%d%H%M%S"))
         return [os.path.join(directory, f) for f in audio_files]
     def save_marged_segments(self,merged_segments,output_directory='/tmp/data/conversations'):
         if not merged_segments:
         return output_directory
     def merge_segments(self,segments):
         combined = AudioSegment.empty()  # 空のAudioSegmentを初期化
             combined += audio
         return combined
     def generate_random_string(self,length):
         letters = string.ascii_letters + string.digits