Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- Dockerfile +32 -32
- README.md +97 -97
- main.py +140 -1
- requirements.txt +11 -9
Dockerfile
CHANGED
@@ -1,33 +1,33 @@
|
|
1 |
-
FROM python:3.11-slim
|
2 |
-
|
3 |
-
WORKDIR /code
|
4 |
-
|
5 |
-
# Install system dependencies
|
6 |
-
RUN apt-get update && apt-get install -y \
|
7 |
-
wget \
|
8 |
-
curl \
|
9 |
-
&& rm -rf /var/lib/apt/lists/*
|
10 |
-
|
11 |
-
# Create cache directories with proper permissions
|
12 |
-
RUN mkdir -p /code/cache && \
|
13 |
-
mkdir -p /tmp/cache && \
|
14 |
-
chmod 777 /code/cache && \
|
15 |
-
chmod 777 /tmp/cache
|
16 |
-
|
17 |
-
# Set environment variables for cache directories
|
18 |
-
ENV TRANSFORMERS_CACHE=/code/cache
|
19 |
-
ENV HF_HOME=/code/cache
|
20 |
-
ENV TORCH_HOME=/code/cache
|
21 |
-
|
22 |
-
# Copy requirements and install Python dependencies
|
23 |
-
COPY requirements.txt .
|
24 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
25 |
-
|
26 |
-
# Copy application code
|
27 |
-
COPY . .
|
28 |
-
|
29 |
-
# Expose port 7860 (Hugging Face Spaces default)
|
30 |
-
EXPOSE 7860
|
31 |
-
|
32 |
-
# Run the application
|
33 |
CMD ["python", "app.py"]
|
|
|
1 |
+
FROM python:3.11-slim
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
# Install system dependencies
|
6 |
+
RUN apt-get update && apt-get install -y \
|
7 |
+
wget \
|
8 |
+
curl \
|
9 |
+
&& rm -rf /var/lib/apt/lists/*
|
10 |
+
|
11 |
+
# Create cache directories with proper permissions
|
12 |
+
RUN mkdir -p /code/cache && \
|
13 |
+
mkdir -p /tmp/cache && \
|
14 |
+
chmod 777 /code/cache && \
|
15 |
+
chmod 777 /tmp/cache
|
16 |
+
|
17 |
+
# Set environment variables for cache directories
|
18 |
+
ENV TRANSFORMERS_CACHE=/code/cache
|
19 |
+
ENV HF_HOME=/code/cache
|
20 |
+
ENV TORCH_HOME=/code/cache
|
21 |
+
|
22 |
+
# Copy requirements and install Python dependencies
|
23 |
+
COPY requirements.txt .
|
24 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
25 |
+
|
26 |
+
# Copy application code
|
27 |
+
COPY . .
|
28 |
+
|
29 |
+
# Expose port 7860 (Hugging Face Spaces default)
|
30 |
+
EXPOSE 7860
|
31 |
+
|
32 |
+
# Run the application
|
33 |
CMD ["python", "app.py"]
|
README.md
CHANGED
@@ -1,98 +1,98 @@
|
|
1 |
-
---
|
2 |
-
title: CLIP Service
|
3 |
-
emoji: 🔍
|
4 |
-
colorFrom: blue
|
5 |
-
colorTo: purple
|
6 |
-
sdk: docker
|
7 |
-
pinned: false
|
8 |
-
---
|
9 |
-
|
10 |
-
# CLIP Service 🔍
|
11 |
-
|
12 |
-
A FastAPI service that provides CLIP (Contrastive Language-Image Pre-training) embeddings for images and text using the `openai/clip-vit-large-patch14` model.
|
13 |
-
|
14 |
-
## 🚀 Features
|
15 |
-
|
16 |
-
- **Image Encoding**: Generate 768-dimensional embeddings from image URLs
|
17 |
-
- **Text Encoding**: Generate embeddings from text descriptions
|
18 |
-
- **High Performance**: Optimized for batch processing
|
19 |
-
- **REST API**: Simple HTTP endpoints for easy integration
|
20 |
-
|
21 |
-
## 📋 API Endpoints
|
22 |
-
|
23 |
-
### `POST /encode/image`
|
24 |
-
Generate embeddings for an image from URL.
|
25 |
-
|
26 |
-
**Request:**
|
27 |
-
```json
|
28 |
-
{
|
29 |
-
"image_url": "https://example.com/image.jpg"
|
30 |
-
}
|
31 |
-
```
|
32 |
-
|
33 |
-
**Response:**
|
34 |
-
```json
|
35 |
-
{
|
36 |
-
"embedding": [0.1, -0.2, 0.3, ...], // 768 dimensions
|
37 |
-
"dimensions": 768
|
38 |
-
}
|
39 |
-
```
|
40 |
-
|
41 |
-
### `POST /encode/text`
|
42 |
-
Generate embeddings for text.
|
43 |
-
|
44 |
-
**Request:**
|
45 |
-
```json
|
46 |
-
{
|
47 |
-
"text": "a beautiful sunset over mountains"
|
48 |
-
}
|
49 |
-
```
|
50 |
-
|
51 |
-
**Response:**
|
52 |
-
```json
|
53 |
-
{
|
54 |
-
"embedding": [0.1, -0.2, 0.3, ...], // 768 dimensions
|
55 |
-
"dimensions": 768
|
56 |
-
}
|
57 |
-
```
|
58 |
-
|
59 |
-
### `GET /health`
|
60 |
-
Check service health and status.
|
61 |
-
|
62 |
-
## 🔧 Usage Examples
|
63 |
-
|
64 |
-
```bash
|
65 |
-
# Encode an image
|
66 |
-
curl -X POST "https://your-username-clip-service.hf.space/encode/image" \
|
67 |
-
-H "Content-Type: application/json" \
|
68 |
-
-d '{"image_url": "https://example.com/image.jpg"}'
|
69 |
-
|
70 |
-
# Encode text
|
71 |
-
curl -X POST "https://your-username-clip-service.hf.space/encode/text" \
|
72 |
-
-H "Content-Type: application/json" \
|
73 |
-
-d '{"text": "a beautiful landscape"}'
|
74 |
-
```
|
75 |
-
|
76 |
-
## 🏗️ Integration
|
77 |
-
|
78 |
-
This service is designed to work with Pinterest-like applications for:
|
79 |
-
- Visual similarity search
|
80 |
-
- Content-based recommendations
|
81 |
-
- Cross-modal search (text to image, image to text)
|
82 |
-
|
83 |
-
## 📝 Model Information
|
84 |
-
|
85 |
-
- **Model**: `openai/clip-vit-large-patch14`
|
86 |
-
- **Embedding Dimensions**: 768
|
87 |
-
- **Supported Images**: JPG, PNG, GIF, WebP
|
88 |
-
- **Max Image Size**: Recommended < 10MB
|
89 |
-
|
90 |
-
## ⚡ Performance
|
91 |
-
|
92 |
-
- **CPU**: ~2-5 seconds per image
|
93 |
-
- **GPU**: ~0.5-1 second per image (when available)
|
94 |
-
- **Batch Processing**: Supported for multiple requests
|
95 |
-
|
96 |
-
---
|
97 |
-
|
98 |
Built with ❤️ using [Transformers](https://huggingface.co/transformers) and [FastAPI](https://fastapi.tiangolo.com/)
|
|
|
1 |
+
---
|
2 |
+
title: CLIP Service
|
3 |
+
emoji: 🔍
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: purple
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
---
|
9 |
+
|
10 |
+
# CLIP Service 🔍
|
11 |
+
|
12 |
+
A FastAPI service that provides CLIP (Contrastive Language-Image Pre-training) embeddings for images and text using the `openai/clip-vit-large-patch14` model.
|
13 |
+
|
14 |
+
## 🚀 Features
|
15 |
+
|
16 |
+
- **Image Encoding**: Generate 768-dimensional embeddings from image URLs
|
17 |
+
- **Text Encoding**: Generate embeddings from text descriptions
|
18 |
+
- **High Performance**: Optimized for batch processing
|
19 |
+
- **REST API**: Simple HTTP endpoints for easy integration
|
20 |
+
|
21 |
+
## 📋 API Endpoints
|
22 |
+
|
23 |
+
### `POST /encode/image`
|
24 |
+
Generate embeddings for an image from URL.
|
25 |
+
|
26 |
+
**Request:**
|
27 |
+
```json
|
28 |
+
{
|
29 |
+
"image_url": "https://example.com/image.jpg"
|
30 |
+
}
|
31 |
+
```
|
32 |
+
|
33 |
+
**Response:**
|
34 |
+
```json
|
35 |
+
{
|
36 |
+
"embedding": [0.1, -0.2, 0.3, ...], // 768 dimensions
|
37 |
+
"dimensions": 768
|
38 |
+
}
|
39 |
+
```
|
40 |
+
|
41 |
+
### `POST /encode/text`
|
42 |
+
Generate embeddings for text.
|
43 |
+
|
44 |
+
**Request:**
|
45 |
+
```json
|
46 |
+
{
|
47 |
+
"text": "a beautiful sunset over mountains"
|
48 |
+
}
|
49 |
+
```
|
50 |
+
|
51 |
+
**Response:**
|
52 |
+
```json
|
53 |
+
{
|
54 |
+
"embedding": [0.1, -0.2, 0.3, ...], // 768 dimensions
|
55 |
+
"dimensions": 768
|
56 |
+
}
|
57 |
+
```
|
58 |
+
|
59 |
+
### `GET /health`
|
60 |
+
Check service health and status.
|
61 |
+
|
62 |
+
## 🔧 Usage Examples
|
63 |
+
|
64 |
+
```bash
|
65 |
+
# Encode an image
|
66 |
+
curl -X POST "https://your-username-clip-service.hf.space/encode/image" \
|
67 |
+
-H "Content-Type: application/json" \
|
68 |
+
-d '{"image_url": "https://example.com/image.jpg"}'
|
69 |
+
|
70 |
+
# Encode text
|
71 |
+
curl -X POST "https://your-username-clip-service.hf.space/encode/text" \
|
72 |
+
-H "Content-Type: application/json" \
|
73 |
+
-d '{"text": "a beautiful landscape"}'
|
74 |
+
```
|
75 |
+
|
76 |
+
## 🏗️ Integration
|
77 |
+
|
78 |
+
This service is designed to work with Pinterest-like applications for:
|
79 |
+
- Visual similarity search
|
80 |
+
- Content-based recommendations
|
81 |
+
- Cross-modal search (text to image, image to text)
|
82 |
+
|
83 |
+
## 📝 Model Information
|
84 |
+
|
85 |
+
- **Model**: `openai/clip-vit-large-patch14`
|
86 |
+
- **Embedding Dimensions**: 768
|
87 |
+
- **Supported Images**: JPG, PNG, GIF, WebP
|
88 |
+
- **Max Image Size**: Recommended < 10MB
|
89 |
+
|
90 |
+
## ⚡ Performance
|
91 |
+
|
92 |
+
- **CPU**: ~2-5 seconds per image
|
93 |
+
- **GPU**: ~0.5-1 second per image (when available)
|
94 |
+
- **Batch Processing**: Supported for multiple requests
|
95 |
+
|
96 |
+
---
|
97 |
+
|
98 |
Built with ❤️ using [Transformers](https://huggingface.co/transformers) and [FastAPI](https://fastapi.tiangolo.com/)
|
main.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1 |
from fastapi import FastAPI, HTTPException
|
2 |
from pydantic import BaseModel
|
3 |
-
from transformers import CLIPProcessor, CLIPModel
|
4 |
import torch
|
5 |
from PIL import Image
|
6 |
import requests
|
7 |
import numpy as np
|
8 |
import io
|
9 |
import logging
|
|
|
|
|
10 |
|
11 |
# Configure logging
|
12 |
logging.basicConfig(level=logging.INFO)
|
@@ -20,6 +22,11 @@ class CLIPService:
|
|
20 |
self.model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
|
21 |
self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
|
22 |
logger.info("CLIP model loaded successfully")
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
def encode_image(self, image_url: str) -> list:
|
25 |
try:
|
@@ -84,6 +91,123 @@ class CLIPService:
|
|
84 |
except Exception as e:
|
85 |
logger.error(f"Error encoding text '{text}': {str(e)}")
|
86 |
raise HTTPException(status_code=500, detail=f"Failed to encode text: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
# Initialize service
|
89 |
clip_service = CLIPService()
|
@@ -94,6 +218,9 @@ class ImageRequest(BaseModel):
|
|
94 |
class TextRequest(BaseModel):
|
95 |
text: str
|
96 |
|
|
|
|
|
|
|
97 |
@app.post("/encode/image")
|
98 |
async def encode_image(request: ImageRequest):
|
99 |
embedding = clip_service.encode_image(request.image_url)
|
@@ -104,6 +231,18 @@ async def encode_text(request: TextRequest):
|
|
104 |
embedding = clip_service.encode_text(request.text)
|
105 |
return {"embedding": embedding}
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
@app.get("/health")
|
108 |
async def health_check():
|
109 |
return {"status": "healthy", "model": "clip-vit-large-patch14"}
|
|
|
1 |
from fastapi import FastAPI, HTTPException
|
2 |
from pydantic import BaseModel
|
3 |
+
from transformers import CLIPProcessor, CLIPModel, ClapModel, ClapProcessor
|
4 |
import torch
|
5 |
from PIL import Image
|
6 |
import requests
|
7 |
import numpy as np
|
8 |
import io
|
9 |
import logging
|
10 |
+
import librosa
|
11 |
+
import soundfile as sf
|
12 |
|
13 |
# Configure logging
|
14 |
logging.basicConfig(level=logging.INFO)
|
|
|
22 |
self.model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
|
23 |
self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
|
24 |
logger.info("CLIP model loaded successfully")
|
25 |
+
|
26 |
+
logger.info("Loading CLAP model for audio...")
|
27 |
+
self.clap_model = ClapModel.from_pretrained("laion/clap-htsat-unfused")
|
28 |
+
self.clap_processor = ClapProcessor.from_pretrained("laion/clap-htsat-unfused")
|
29 |
+
logger.info("CLAP model loaded successfully")
|
30 |
|
31 |
def encode_image(self, image_url: str) -> list:
|
32 |
try:
|
|
|
91 |
except Exception as e:
|
92 |
logger.error(f"Error encoding text '{text}': {str(e)}")
|
93 |
raise HTTPException(status_code=500, detail=f"Failed to encode text: {str(e)}")
|
94 |
+
|
95 |
+
def encode_audio(self, audio_url: str) -> list:
|
96 |
+
try:
|
97 |
+
# Enhanced headers for audio files with MIME whitelist
|
98 |
+
headers = {
|
99 |
+
'User-Agent': 'CLAP-Service/1.0 (Audio-Embedding-Service)',
|
100 |
+
'Accept': 'audio/mpeg, audio/wav, audio/mp4, audio/ogg, audio/flac',
|
101 |
+
'Cache-Control': 'no-cache'
|
102 |
+
}
|
103 |
+
|
104 |
+
logger.info(f"Fetching audio from URL: {audio_url}")
|
105 |
+
|
106 |
+
# Increase timeout for large files, but add streaming response
|
107 |
+
response = requests.get(audio_url, timeout=60, headers=headers, stream=True)
|
108 |
+
response.raise_for_status()
|
109 |
+
|
110 |
+
# Check content type before processing
|
111 |
+
content_type = response.headers.get('content-type', 'unknown')
|
112 |
+
if not content_type.startswith('audio/'):
|
113 |
+
raise ValueError(f"Invalid content type: {content_type}. Expected audio/*")
|
114 |
+
|
115 |
+
# Check file size before downloading (100MB limit)
|
116 |
+
content_length = response.headers.get('content-length')
|
117 |
+
if content_length and int(content_length) > 100 * 1024 * 1024:
|
118 |
+
raise ValueError(f"Audio file too large: {content_length} bytes. Maximum is 100MB")
|
119 |
+
|
120 |
+
# Stream content to BytesIO with size limit
|
121 |
+
audio_data = io.BytesIO()
|
122 |
+
total_size = 0
|
123 |
+
max_size = 100 * 1024 * 1024 # 100MB
|
124 |
+
|
125 |
+
for chunk in response.iter_content(chunk_size=8192):
|
126 |
+
total_size += len(chunk)
|
127 |
+
if total_size > max_size:
|
128 |
+
raise ValueError("Audio file too large during download")
|
129 |
+
audio_data.write(chunk)
|
130 |
+
|
131 |
+
audio_data.seek(0)
|
132 |
+
logger.info(f"Successfully fetched audio: {content_type}, {total_size} bytes")
|
133 |
+
|
134 |
+
# Load audio with duration limit (10 minutes = 600 seconds)
|
135 |
+
MAX_DURATION = 600 # 10 minutes
|
136 |
+
|
137 |
+
try:
|
138 |
+
# First, get duration without loading full audio
|
139 |
+
duration = librosa.get_duration(path=audio_data)
|
140 |
+
audio_data.seek(0) # Reset stream
|
141 |
+
|
142 |
+
if duration > MAX_DURATION:
|
143 |
+
raise ValueError(f"Audio duration ({duration:.1f}s) exceeds maximum allowed ({MAX_DURATION}s)")
|
144 |
+
|
145 |
+
logger.info(f"Audio duration: {duration:.1f} seconds")
|
146 |
+
|
147 |
+
# Load only first 30 seconds for embedding (CLAP works well with shorter clips)
|
148 |
+
# This reduces memory usage significantly
|
149 |
+
duration_limit = min(30.0, duration)
|
150 |
+
|
151 |
+
# Load audio with librosa (48kHz is CLAP's expected sample rate)
|
152 |
+
waveform, sample_rate = librosa.load(
|
153 |
+
audio_data,
|
154 |
+
sr=48000,
|
155 |
+
mono=True,
|
156 |
+
duration=duration_limit,
|
157 |
+
offset=0.0
|
158 |
+
)
|
159 |
+
|
160 |
+
logger.info(f"Processing audio: {len(waveform)} samples at {sample_rate}Hz ({duration_limit:.1f}s)")
|
161 |
+
|
162 |
+
except Exception as e:
|
163 |
+
logger.error(f"Error loading audio file: {str(e)}")
|
164 |
+
raise ValueError(f"Failed to load audio file: {str(e)}")
|
165 |
+
|
166 |
+
# Process audio through CLAP
|
167 |
+
inputs = self.clap_processor(audios=waveform, return_tensors="pt", sampling_rate=48000)
|
168 |
+
|
169 |
+
with torch.no_grad():
|
170 |
+
audio_features = self.clap_model.get_audio_features(**inputs)
|
171 |
+
# Normalize the features
|
172 |
+
audio_features = audio_features / audio_features.norm(dim=-1, keepdim=True)
|
173 |
+
|
174 |
+
embedding = audio_features.numpy().flatten().tolist()
|
175 |
+
logger.info(f"Generated audio embedding with {len(embedding)} dimensions")
|
176 |
+
|
177 |
+
return embedding
|
178 |
+
|
179 |
+
except ValueError as e:
|
180 |
+
# Handle validation errors (file too large, wrong format, etc.)
|
181 |
+
logger.error(f"Validation error for audio {audio_url}: {str(e)}")
|
182 |
+
raise HTTPException(status_code=400, detail=str(e))
|
183 |
+
except requests.exceptions.RequestException as e:
|
184 |
+
logger.error(f"Network error fetching audio {audio_url}: {str(e)}")
|
185 |
+
if hasattr(e, 'response') and e.response is not None:
|
186 |
+
status_code = e.response.status_code
|
187 |
+
if status_code == 403:
|
188 |
+
raise HTTPException(status_code=403, detail="Access denied to audio URL")
|
189 |
+
elif status_code == 404:
|
190 |
+
raise HTTPException(status_code=404, detail="Audio not found at URL")
|
191 |
+
elif status_code >= 500:
|
192 |
+
raise HTTPException(status_code=502, detail="Audio service temporarily unavailable")
|
193 |
+
raise HTTPException(status_code=500, detail=f"Failed to fetch audio: {str(e)}")
|
194 |
+
except Exception as e:
|
195 |
+
logger.error(f"Error encoding audio {audio_url}: {str(e)}")
|
196 |
+
raise HTTPException(status_code=500, detail=f"Failed to encode audio: {str(e)}")
|
197 |
+
|
198 |
+
def encode_text_for_audio(self, text: str) -> list:
|
199 |
+
"""Encode text for cross-modal audio search"""
|
200 |
+
try:
|
201 |
+
inputs = self.clap_processor(text=[text], return_tensors="pt", padding=True)
|
202 |
+
|
203 |
+
with torch.no_grad():
|
204 |
+
text_features = self.clap_model.get_text_features(**inputs)
|
205 |
+
text_features = text_features / text_features.norm(dim=-1, keepdim=True)
|
206 |
+
|
207 |
+
return text_features.numpy().flatten().tolist()
|
208 |
+
except Exception as e:
|
209 |
+
logger.error(f"Error encoding text for audio '{text}': {str(e)}")
|
210 |
+
raise HTTPException(status_code=500, detail=f"Failed to encode text for audio: {str(e)}")
|
211 |
|
212 |
# Initialize service
|
213 |
clip_service = CLIPService()
|
|
|
218 |
class TextRequest(BaseModel):
|
219 |
text: str
|
220 |
|
221 |
+
class AudioRequest(BaseModel):
|
222 |
+
audio_url: str
|
223 |
+
|
224 |
@app.post("/encode/image")
|
225 |
async def encode_image(request: ImageRequest):
|
226 |
embedding = clip_service.encode_image(request.image_url)
|
|
|
231 |
embedding = clip_service.encode_text(request.text)
|
232 |
return {"embedding": embedding}
|
233 |
|
234 |
+
@app.post("/encode/audio")
|
235 |
+
async def encode_audio(request: AudioRequest):
|
236 |
+
"""Encode audio file to CLAP embedding vector"""
|
237 |
+
embedding = clip_service.encode_audio(request.audio_url)
|
238 |
+
return {"embedding": embedding}
|
239 |
+
|
240 |
+
@app.post("/encode/text-audio")
|
241 |
+
async def encode_text_for_audio(request: TextRequest):
|
242 |
+
"""Encode text for audio similarity search"""
|
243 |
+
embedding = clip_service.encode_text_for_audio(request.text)
|
244 |
+
return {"embedding": embedding}
|
245 |
+
|
246 |
@app.get("/health")
|
247 |
async def health_check():
|
248 |
return {"status": "healthy", "model": "clip-vit-large-patch14"}
|
requirements.txt
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
-
torch==2.0.1
|
2 |
-
transformers==4.30.0
|
3 |
-
Pillow==9.5.0
|
4 |
-
requests==2.31.0
|
5 |
-
fastapi==0.104.1
|
6 |
-
uvicorn==0.22.0
|
7 |
-
python-multipart==0.0.6
|
8 |
-
pydantic==2.5.0
|
9 |
-
numpy<2.0.0
|
|
|
|
|
|
1 |
+
torch==2.0.1
|
2 |
+
transformers==4.30.0
|
3 |
+
Pillow==9.5.0
|
4 |
+
requests==2.31.0
|
5 |
+
fastapi==0.104.1
|
6 |
+
uvicorn==0.22.0
|
7 |
+
python-multipart==0.0.6
|
8 |
+
pydantic==2.5.0
|
9 |
+
numpy<2.0.0
|
10 |
+
librosa>=0.10.0
|
11 |
+
soundfile>=0.12.1
|