Chittrarasu commited on
Commit
57d02cb
·
0 Parent(s):
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ PINECONE_API_KEY=pcsk_5NzQp8_MrApuQxQBU5P3YXYqipyVM4hm7BdA7tzB9tYPJQJSWySrtgW3KJHkS5gMYvLJZk
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python 3.9 image
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy requirements file
8
+ COPY requirements.txt .
9
+
10
+ # Install dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy application code
14
+ COPY app/ .
15
+
16
+ # Expose port 8000 (Hugging Face Spaces uses 7860 by default, but we'll map it)
17
+ EXPOSE 7860
18
+
19
+ # Start FastAPI with uvicorn
20
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Image Search Engine FastAPI
3
+ emoji: 🐨
4
+ colorFrom: purple
5
+ colorTo: gray
6
+ sdk: docker
7
+ pinned: false
8
+ ---
__pycache__/main.cpython-310.pyc ADDED
Binary file (491 Bytes). View file
 
main.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ from routers import search
3
+
4
+ app = FastAPI(title="Image Search API")
5
+
6
+ # Include search routes
7
+ app.include_router(search.router)
8
+
9
+ @app.get("/")
10
+ def root():
11
+ return {"message": "Image Search API is running!"}
models/search_models.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Dict
3
+
4
+ class SearchResult(BaseModel):
5
+ id: str
6
+ score: float
7
+ metadata: Dict[str, str]
8
+
9
+ class SearchResponse(BaseModel):
10
+ matches: List[SearchResult]
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.95.0
2
+ uvicorn>=0.20.0
3
+ sentence-transformers>=2.2.0
4
+ pillow>=9.0.0
5
+ torch>=2.0.0
6
+ pinecone
7
+ python-dotenv>=1.0.0
8
+ numpy>=1.24.0
routers/__pycache__/search.cpython-310.pyc ADDED
Binary file (1.34 kB). View file
 
routers/search.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, Query
2
+ from fastapi.responses import JSONResponse
3
+ from services.embedding import get_text_embedding, get_image_embedding
4
+ from services.pinecone_service import search_similar_images
5
+ from typing import List, Dict, Any
6
+
7
+ router = APIRouter(prefix="/search", tags=["Search"])
8
+
9
+ @router.get("/text")
10
+ async def search_by_text(query: str = Query(..., description="Search query")) -> Dict[str, Any]:
11
+ try:
12
+ query_embedding = get_text_embedding(query)
13
+ if not query_embedding or not isinstance(query_embedding, list):
14
+ return JSONResponse(content={"error": "Failed to generate embedding"}, status_code=500)
15
+
16
+ results = search_similar_images(query_embedding)
17
+ if not results:
18
+ return JSONResponse(content={"matches": []}, status_code=200)
19
+
20
+ return JSONResponse(content={"matches": results}, status_code=200)
21
+ except Exception as e:
22
+ return JSONResponse(content={"error": str(e)}, status_code=500)
23
+
24
+ @router.post("/image")
25
+ async def search_by_image(file: UploadFile = File(...)) -> Dict[str, Any]:
26
+ try:
27
+ image_embedding = get_image_embedding(file)
28
+ if not image_embedding or not isinstance(image_embedding, list):
29
+ return JSONResponse(content={"error": "Failed to generate embedding"}, status_code=500)
30
+
31
+ results = search_similar_images(image_embedding)
32
+ if not results:
33
+ return JSONResponse(content={"matches": []}, status_code=200)
34
+
35
+ return JSONResponse(content={"matches": results}, status_code=200)
36
+ except Exception as e:
37
+ return JSONResponse(content={"error": str(e)}, status_code=500)
services/__pycache__/embedding.cpython-310.pyc ADDED
Binary file (1.18 kB). View file
 
services/__pycache__/pinecone_service.cpython-310.pyc ADDED
Binary file (1.28 kB). View file
 
services/embedding.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from PIL import Image
3
+ from fastapi import UploadFile
4
+ from typing import List, Optional
5
+ import torch
6
+
7
+ model = SentenceTransformer("clip-ViT-B-32")
8
+
9
+ def get_text_embedding(text: str) -> Optional[List[float]]:
10
+ try:
11
+ embedding = model.encode(text, convert_to_tensor=True).cpu().numpy().tolist()
12
+ return embedding
13
+ except Exception as e:
14
+ print(f"Error generating text embedding: {e}")
15
+ return None
16
+
17
+ def get_image_embedding(image_file: UploadFile) -> Optional[List[float]]:
18
+ try:
19
+ image = Image.open(image_file.file).convert("RGB").resize((224, 224))
20
+ embedding = model.encode(image, convert_to_tensor=True).cpu().numpy().tolist()
21
+ return embedding
22
+ except Exception as e:
23
+ print(f"Error generating image embedding: {e}")
24
+ return None
services/pinecone_service.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pinecone import Pinecone
3
+ from dotenv import load_dotenv
4
+ from typing import List, Dict, Any
5
+
6
+ # Load environment variables
7
+ load_dotenv()
8
+
9
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
10
+ if not PINECONE_API_KEY:
11
+ raise ValueError("PINECONE_API_KEY is missing! Check your .env file.")
12
+
13
+ INDEX_NAME = "unsplash-index-session"
14
+ NAMESPACE = "image-search-dataset"
15
+
16
+ # Initialize Pinecone Client
17
+ pc = Pinecone(api_key=PINECONE_API_KEY)
18
+ index = pc.Index(INDEX_NAME)
19
+
20
+ def search_similar_images(query_embedding: List[float], top_k: int = 10) -> List[Dict[str, Any]]:
21
+ """Search for similar images in Pinecone using the given embedding."""
22
+ try:
23
+ results = index.query(
24
+ vector=query_embedding,
25
+ top_k=top_k,
26
+ include_metadata=True,
27
+ namespace=NAMESPACE
28
+ )
29
+
30
+ cleaned_results = []
31
+ for match in results.get("matches", []):
32
+ metadata = match.get("metadata", {})
33
+ cleaned_results.append({
34
+ "id": match["id"],
35
+ "score": float(match["score"]),
36
+ "url": metadata.get("url", "")
37
+ })
38
+
39
+ return cleaned_results
40
+
41
+ except Exception as e:
42
+ print(f"❌ Error querying Pinecone: {e}")
43
+ return []