File size: 2,844 Bytes
c2864d3
 
91a018b
 
 
2ea3a36
 
fcf5834
c2864d3
2ea3a36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2864d3
fcf5834
c2864d3
 
1379c69
 
 
 
 
 
 
 
 
 
 
91a018b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from flask import Flask, request, jsonify
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification

# Initialize the tokenizer and model
import whisper
import os

app = Flask(__name__)



# Load the model once at startup (better performance for multiple requests)
model = whisper.load_model("small")

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in {'wav', 'mp3', 'ogg', 'flac', 'm4a'}

@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
    # Check if a file was uploaded
    if 'file' not in request.files:
        return jsonify({'error': 'No file uploaded'}), 400
        
    file = request.files['file']
    
    # Check if the file is empty
    if file.filename == '':
        return jsonify({'error': 'No selected file'}), 400
        
    # Check allowed file types
    if not allowed_file(file.filename):
        return jsonify({'error': 'Unsupported file type'}), 400

    try:
        # Save the temporary file
        temp_path = "temp_audio"
        file.save(temp_path)
        
        # Transcribe the audio
        result = model.transcribe(temp_path)
        transcription = result["text"]
        
        # Clean up the temporary file
        if os.path.exists(temp_path):
            os.remove(temp_path)
            
        return jsonify({'transcription': transcription})
    
    except Exception as e:
        return jsonify({'error': str(e)}), 500
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)

@app.route('/classify', methods=['POST'])
def classify():
    try:
        data = request.get_json()
        if 'text' not in data:
            return jsonify({"error": "Missing 'text' field"}), 400
        
        text = data['text']
        result = classifier(text)
        return jsonify(result)
    
    except Exception as e:
        return jsonify({"error": str(e)}), 500

tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
@app.route('/ner', methods=['POST'])
def ner_endpoint():
    try:
        # Get text from request
        data = request.get_json()
        text = data.get("text", "")
        
        # Perform NER
        ner_results = nlp(text)
        
        # Extract words and their corresponding entities
        words_and_entities = [
            {"word": result['word'], "entity": result['entity']} 
            for result in ner_results
        ]
        
        # Return JSON response with the words and their entities
        return jsonify({"entities": words_and_entities})
    except Exception as e:
        return jsonify({"error": str(e)}), 500