Nechba's picture
Update app.py
2ea3a36 verified
raw
history blame
2.84 kB
from flask import Flask, request, jsonify
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification
# Initialize the tokenizer and model
import whisper
import os
app = Flask(__name__)
# Load the model once at startup (better performance for multiple requests)
model = whisper.load_model("small")
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in {'wav', 'mp3', 'ogg', 'flac', 'm4a'}
@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
# Check if a file was uploaded
if 'file' not in request.files:
return jsonify({'error': 'No file uploaded'}), 400
file = request.files['file']
# Check if the file is empty
if file.filename == '':
return jsonify({'error': 'No selected file'}), 400
# Check allowed file types
if not allowed_file(file.filename):
return jsonify({'error': 'Unsupported file type'}), 400
try:
# Save the temporary file
temp_path = "temp_audio"
file.save(temp_path)
# Transcribe the audio
result = model.transcribe(temp_path)
transcription = result["text"]
# Clean up the temporary file
if os.path.exists(temp_path):
os.remove(temp_path)
return jsonify({'transcription': transcription})
except Exception as e:
return jsonify({'error': str(e)}), 500
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
@app.route('/classify', methods=['POST'])
def classify():
try:
data = request.get_json()
if 'text' not in data:
return jsonify({"error": "Missing 'text' field"}), 400
text = data['text']
result = classifier(text)
return jsonify(result)
except Exception as e:
return jsonify({"error": str(e)}), 500
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
@app.route('/ner', methods=['POST'])
def ner_endpoint():
try:
# Get text from request
data = request.get_json()
text = data.get("text", "")
# Perform NER
ner_results = nlp(text)
# Extract words and their corresponding entities
words_and_entities = [
{"word": result['word'], "entity": result['entity']}
for result in ner_results
]
# Return JSON response with the words and their entities
return jsonify({"entities": words_and_entities})
except Exception as e:
return jsonify({"error": str(e)}), 500