File size: 3,362 Bytes
b63d954
 
64a8c4a
3b06717
5b86980
3b06717
 
 
b63d954
 
 
 
5b86980
 
 
 
b63d954
 
 
 
 
5b86980
b63d954
 
5b86980
 
 
 
 
 
 
 
 
 
3b06717
 
5b86980
 
3b06717
 
5b86980
 
 
 
 
3b06717
 
5b86980
 
 
 
 
 
 
 
 
 
 
 
3b06717
5b86980
 
 
 
3b06717
b63d954
5b86980
 
 
b63d954
 
 
 
 
 
 
 
 
 
 
 
3b06717
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import requests
import gradio as gr
import os
import torch
import json

# Check if CUDA is available and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

API_URL = "https://api-inference.huggingface.co/models/MIT/ast-finetuned-audioset-10-10-0.4593"
headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}

def format_error(message):
    """Helper function to format error messages as JSON"""
    return [{"error": message}]

def classify_audio(audio_file):
    """
    Classify the uploaded audio file using Hugging Face AST model
    """
    if audio_file is None:
        return format_error("Please upload an audio file.")
    
    try:
        # Debug: Print token status (masked)
        token = os.environ.get('HF_TOKEN')
        if not token:
            return format_error("Error: HF_TOKEN environment variable is not set. Please set your Hugging Face API token.")
        print(f"Token present: {'Yes' if token else 'No'}, Token length: {len(token) if token else 0}")
        
        # Debug: Print audio file info
        print(f"Audio file path: {audio_file.name}")
        print(f"Audio file size: {os.path.getsize(audio_file.name)} bytes")
        
        with open(audio_file.name, "rb") as f:
            data = f.read()
        
        print("Sending request to Hugging Face API...")
        response = requests.post(API_URL, headers=headers, data=data)
        
        # Print response for debugging
        print(f"Response status code: {response.status_code}")
        print(f"Response headers: {dict(response.headers)}")
        print(f"Response content: {response.content.decode('utf-8', errors='ignore')}")
        
        if response.status_code == 200:
            results = response.json()
            # Format results for better readability
            formatted_results = []
            for result in results:
                formatted_results.append({
                    'label': result['label'],
                    'score': f"{result['score']*100:.2f}%"
                })
            return formatted_results
        elif response.status_code == 401:
            return format_error("Error: Invalid or missing API token. Please check your Hugging Face API token.")
        elif response.status_code == 503:
            return format_error("Error: Model is loading. Please try again in a few seconds.")
        else:
            error_msg = f"Error: API returned status code {response.status_code}\n"
            error_msg += f"Response headers: {dict(response.headers)}\n"
            error_msg += f"Response: {response.text}"
            return format_error(error_msg)
            
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        return format_error(f"Error processing audio: {str(e)}\nDetails:\n{error_details}")

# Create Gradio interface
iface = gr.Interface(
    fn=classify_audio,
    inputs=gr.Audio(type="filepath", label="Upload Audio File"),
    outputs=gr.JSON(label="Classification Results"),
    title="Audio Classification using AST Model",
    description="Upload an audio file to get its classification results using the Audio Spectrogram Transformer model.",
    examples=[],
)

# Launch the interface
if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)