Spaces:

Justtalk
/

JusTalk

Running

App Files Files Community

A-yum1 commited on Mar 21

Commit

21b85ef

2 Parent(s): cf6a323 dda9dff

merge branch

Browse files

Files changed (5) hide show

__pycache__/process.cpython-310.pyc +0 -0
__pycache__/transcription.cpython-310.pyc +0 -0
app.py +12 -4
process.py +2 -2
transcription.py +48 -2

__pycache__/process.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/process.cpython-310.pyc and b/__pycache__/process.cpython-310.pyc differ

__pycache__/transcription.cpython-310.pyc ADDED Viewed

Binary file (2.84 kB). View file

app.py CHANGED Viewed

@@ -4,11 +4,14 @@ from pydub import AudioSegment  # 変換用にpydubをインポート
 import os
 import shutil
 from process import AudioProcessor
 process=AudioProcessor()
 app = Flask(__name__)
 users = []
 # トップページ（テンプレート: index.html）
 @app.route('/')
@@ -21,7 +24,6 @@ def index():
 def feedback():
     return render_template('feedback.html')
 # 会話詳細画面（テンプレート: talkDetail.html）
 @app.route('/talk_detail', methods=['GET', 'POST'])
 def talk_detail():
@@ -36,11 +38,17 @@ def userregister():
 def confirm():
     return jsonify({'members': users}), 200
 # 音声アップロード＆解析エンドポイント
 @app.route('/upload_audio', methods=['POST'])
 def upload_audio():
     try:
         data = request.get_json()
         # name か users のいずれかが必須。どちらも無い場合はエラー
@@ -71,9 +79,9 @@ def upload_audio():
         # 複数人の場合は参照パスのリストを、1人の場合は単一のパスを渡す
         if len(users) > 1:
             print("複数人の場合の処理")
-            matched_time, unmatched_time = process.process_multi_audio(reference_paths, audio_path, threshold=0.05)
         else:
-            matched_time, unmatched_time = process.process_audio(reference_paths[0], audio_path, threshold=0.05)
         total_time = matched_time + unmatched_time
         rate = (matched_time / total_time) * 100 if total_time > 0 else 0

 import os
 import shutil
 from process import AudioProcessor
+from transcription import TranscriptionMaker
 process=AudioProcessor()
+transcription = TranscriptionMaker()
 app = Flask(__name__)
 users = []
+segments_dir
 # トップページ（テンプレート: index.html）
 @app.route('/')
 def feedback():
     return render_template('feedback.html')
 # 会話詳細画面（テンプレート: talkDetail.html）
 @app.route('/talk_detail', methods=['GET', 'POST'])
 def talk_detail():
 def confirm():
     return jsonify({'members': users}), 200
+# 書き起こし作成エンドポイント
+@app.route('/transcription',methods =['GET','POST'])
+def transcription():
+    global segments_dir
+    text = transcription.create_transcription(segments_dir)
+    return jsonify({'transcription': text}),200
 # 音声アップロード＆解析エンドポイント
 @app.route('/upload_audio', methods=['POST'])
 def upload_audio():
+    global segments_dir
     try:
         data = request.get_json()
         # name か users のいずれかが必須。どちらも無い場合はエラー
         # 複数人の場合は参照パスのリストを、1人の場合は単一のパスを渡す
         if len(users) > 1:
             print("複数人の場合の処理")
+            matched_time, unmatched_time,segments_dir = process.process_multi_audio(reference_paths, audio_path, threshold=0.05)
         else:
+            matched_time, unmatched_time, segments_dir = process.process_audio(reference_paths[0], audio_path, threshold=0.05)
         total_time = matched_time + unmatched_time
         rate = (matched_time / total_time) * 100 if total_time > 0 else 0

process.py CHANGED Viewed

@@ -89,7 +89,7 @@ class AudioProcessor():
                 matched_time_ms += len(AudioSegment.from_file(segment_file))
         unmatched_time_ms = total_duration_ms - matched_time_ms
-        return matched_time_ms, unmatched_time_ms
     def process_multi_audio(self, reference_pathes, input_path, output_folder='/tmp/data/matched_multi_segments', seg_duration=1.0, threshold=0.5):
@@ -141,7 +141,7 @@ class AudioProcessor():
             if match is not None:
                 matched_time[match] += seg_duration
-        return matched_time
     def save_audio_from_base64(self,base64_audio,output_dir,output_filename,temp_format='webm'):

                 matched_time_ms += len(AudioSegment.from_file(segment_file))
         unmatched_time_ms = total_duration_ms - matched_time_ms
+        return matched_time_ms, unmatched_time_ms, output_folder
     def process_multi_audio(self, reference_pathes, input_path, output_folder='/tmp/data/matched_multi_segments', seg_duration=1.0, threshold=0.5):
             if match is not None:
                 matched_time[match] += seg_duration
+        return matched_time,segment_file,segmented_path
     def save_audio_from_base64(self,base64_audio,output_dir,output_filename,temp_format='webm'):

transcription.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 from faster_whisper import WhisperModel
 class TranscriptionMaker():
     #書き起こしファイルを吐き出すディレクトリを指定
@@ -14,8 +15,10 @@ class TranscriptionMaker():
             raise
     #音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する
-    def create_transcription(self,audio_directory):
         results = []
         #ディレクトリ内のファイルを全て取得
         if not os.path.isdir(audio_directory):
             raise ValueError(f"The specified path is not a valid directory: {audio_directory}")
@@ -44,4 +47,47 @@ class TranscriptionMaker():
         except OSError as e:
             print(f"Error writing transcription file: {e}")
             raise
-        return output_file

 import os
 from faster_whisper import WhisperModel
+from pydub import AudioSegment
 class TranscriptionMaker():
     #書き起こしファイルを吐き出すディレクトリを指定
             raise
     #音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する
+    def create_transcription(self,segments_directory):
         results = []
+        #細切れ音声をくっつける
+        audio_directory = self.merge_segments(segments_directory)
         #ディレクトリ内のファイルを全て取得
         if not os.path.isdir(audio_directory):
             raise ValueError(f"The specified path is not a valid directory: {audio_directory}")
         except OSError as e:
             print(f"Error writing transcription file: {e}")
             raise
+        return output_file
+    #ファイル名が連続しているならくっつける
+    def merge_segments(self,segments_dir,output_dir = "/tmp/data/merged_audio"):
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir, exist_ok=True)
+        files = sorted([f for f in os.listdir(segments_dir) if f.endswith('.wav')])
+        merged_files = []
+        current_group = []
+        previous_index = None
+        for file in files:
+            # ファイル名から番号を抽出（例: "0.wav" -> 0）
+            file_index = int(file.split('.')[0])
+            # 番号が連続していない場合、新しいグループを作成
+            if previous_index is not None and file_index != previous_index + 1:
+                # 現在のグループを結合して保存
+                if current_group:
+                    merged_files.append(current_group)
+                current_group = []
+            # 現在のファイルをグループに追加
+            current_group.append(file)
+            previous_index = file_index
+        # 最後のグループを追加
+        if current_group:
+            merged_files.append(current_group)
+        # グループごとに結合して保存
+        for i, group in enumerate(merged_files):
+            combined_audio = AudioSegment.empty()
+            for file in group:
+                file_path = os.path.join(segments_dir, file)
+                segment = AudioSegment.from_file(file_path)
+                combined_audio += segment
+            # 出力ファイル名を設定して保存
+            output_file = os.path.join(output_dir, f'merged_{i}.wav')
+            combined_audio.export(output_file, format='wav')
+        return output_dir