nurfarah57 commited on
Commit
679fd42
·
verified ·
1 Parent(s): 4555268

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +66 -0
main.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+
3
+ from fastapi import FastAPI, UploadFile, File
4
+ from fastapi.responses import JSONResponse
5
+ from transformers import pipeline
6
+ import traceback
7
+ import re
8
+ import uvicorn
9
+
10
+ app = FastAPI(title="Tacab ASR Somali API")
11
+
12
+ # Load ASR model
13
+ asr = pipeline(
14
+ "automatic-speech-recognition",
15
+ model="tacab/ASR_SOMALI",
16
+ tokenizer="tacab/ASR_SOMALI",
17
+ chunk_length_s=30,
18
+ stride_length_s=6,
19
+ return_timestamps="word",
20
+ device=-1
21
+ )
22
+
23
+ # Auto punctuation
24
+ def auto_punctuate(text):
25
+ text = text.strip()
26
+ # Capitalize sentences
27
+ def capitalize_sentences(text):
28
+ sentences = re.split(r'(?<=[.?!])\s+', text)
29
+ return '. '.join(s.strip().capitalize() for s in sentences if s)
30
+
31
+ if '.' not in text and len(text.split()) > 5:
32
+ text += '.'
33
+
34
+ words = text.split()
35
+ new_text = ""
36
+ for i in range(0, len(words), 10):
37
+ segment = " ".join(words[i:i+10])
38
+ new_text += segment.strip().capitalize() + ". "
39
+
40
+ return capitalize_sentences(new_text.strip())
41
+
42
+ @app.post("/transcribe")
43
+ async def transcribe(file: UploadFile = File(...)):
44
+ try:
45
+ # Save the uploaded file temporarily
46
+ temp_path = f"/tmp/{file.filename}"
47
+ with open(temp_path, "wb") as f:
48
+ f.write(await file.read())
49
+
50
+ # Transcribe
51
+ result = asr(temp_path)
52
+ raw_text = result.get("text", "").strip()
53
+ if not raw_text:
54
+ return JSONResponse({"error": "No transcription result."}, status_code=400)
55
+
56
+ # Punctuate
57
+ cleaned_text = auto_punctuate(raw_text)
58
+
59
+ return {"transcription": cleaned_text}
60
+
61
+ except Exception as e:
62
+ traceback.print_exc()
63
+ return JSONResponse({"error": str(e)}, status_code=500)
64
+
65
+ if __name__ == "__main__":
66
+ uvicorn.run(app, host="0.0.0.0", port=7860)