Spaces:
No application file
No application file
Commit
·
480e694
1
Parent(s):
d435341
Product Recommendation RestAPI
Browse files- .gitignore +29 -0
- README.md +52 -1
- backend/Dockerfile +8 -0
- backend/__init__.py +1 -0
- app.py → backend/app.py +36 -48
- backend/config/settings.py +26 -0
- backend/docker-compose.yml +16 -0
- backend/models/__init__.py +1 -0
- backend/models/schemas.py +18 -0
- backend/recommend.py +36 -0
- backend/requirements.txt +12 -0
- backend/server.py +101 -0
- backend/tests/conftest.py +19 -0
- backend/tests/test_recommend.py +0 -0
- backend/tests/test_server.py +0 -0
- backend/train.py +90 -0
- backend/upload_model.py +22 -0
- backend/utils/helpers.py +25 -0
- key/carteya-432304-f95ff2ea1148.json +13 -0
- requirements.txt +0 -3
- run.py +10 -0
- setup.py +19 -0
.gitignore
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
*.py[cod]
|
3 |
+
*$py.class
|
4 |
+
*.so
|
5 |
+
.Python
|
6 |
+
build/
|
7 |
+
develop-eggs/
|
8 |
+
dist/
|
9 |
+
downloads/
|
10 |
+
eggs/
|
11 |
+
.eggs/
|
12 |
+
lib/
|
13 |
+
lib64/
|
14 |
+
parts/
|
15 |
+
sdist/
|
16 |
+
var/
|
17 |
+
wheels/
|
18 |
+
*.egg-info/
|
19 |
+
.installed.cfg
|
20 |
+
*.egg
|
21 |
+
.env
|
22 |
+
.venv
|
23 |
+
venv/
|
24 |
+
ENV/
|
25 |
+
.idea/
|
26 |
+
.vscode/
|
27 |
+
*.pkl
|
28 |
+
models/*
|
29 |
+
!models/.gitkeep
|
README.md
CHANGED
@@ -11,4 +11,55 @@ license: apache-2.0
|
|
11 |
short_description: Testing for e-commerce
|
12 |
---
|
13 |
|
14 |
-
An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.25.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
short_description: Testing for e-commerce
|
12 |
---
|
13 |
|
14 |
+
An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.25.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
|
15 |
+
|
16 |
+
# README.md
|
17 |
+
# Product Recommender
|
18 |
+
|
19 |
+
A machine learning-powered product recommendation system that uses semantic search to find similar products based on user queries.
|
20 |
+
|
21 |
+
## Features
|
22 |
+
|
23 |
+
- Semantic search using sentence transformers
|
24 |
+
- FastAPI backend for quick recommendations
|
25 |
+
- BigQuery integration for training data
|
26 |
+
- HuggingFace model hosting
|
27 |
+
- Docker support for easy deployment
|
28 |
+
|
29 |
+
## Setup
|
30 |
+
|
31 |
+
1. Install dependencies:
|
32 |
+
```bash
|
33 |
+
pip install -e .
|
34 |
+
```
|
35 |
+
|
36 |
+
2. Copy and configure environment variables:
|
37 |
+
```bash
|
38 |
+
cp backend/.env.example backend/.env
|
39 |
+
# Edit .env with your credentials
|
40 |
+
```
|
41 |
+
|
42 |
+
3. Train the model:
|
43 |
+
```bash
|
44 |
+
python -m backend.train
|
45 |
+
```
|
46 |
+
|
47 |
+
4. Start the API:
|
48 |
+
```bash
|
49 |
+
python -m backend.server
|
50 |
+
```
|
51 |
+
|
52 |
+
## API Documentation
|
53 |
+
|
54 |
+
Once running, visit http://localhost:8000/docs for the OpenAPI documentation.
|
55 |
+
|
56 |
+
## Docker Usage
|
57 |
+
|
58 |
+
```bash
|
59 |
+
cd backend
|
60 |
+
docker-compose up --build
|
61 |
+
```
|
62 |
+
|
63 |
+
## License
|
64 |
+
|
65 |
+
MIT License - See LICENSE file for details
|
backend/Dockerfile
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
COPY . /app
|
5 |
+
|
6 |
+
RUN pip install --no-cache-dir fastapi uvicorn transformers torch sentence-transformers scikit-learn python-dotenv google-cloud-bigquery
|
7 |
+
|
8 |
+
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]
|
backend/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Empty file to make the directory a Python package
|
app.py → backend/app.py
RENAMED
@@ -1,74 +1,62 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
from huggingface_hub.utils import HfHubHTTPError
|
4 |
-
import os
|
5 |
from dotenv import load_dotenv
|
6 |
|
7 |
-
# Load environment variables from .env file
|
8 |
-
|
|
|
9 |
|
10 |
-
"""
|
11 |
-
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
12 |
-
"""
|
13 |
api_token = os.getenv("HUGGINGFACE_API_TOKEN")
|
14 |
if not api_token:
|
15 |
-
raise ValueError("HUGGINGFACE_API_TOKEN environment variable not set")
|
16 |
|
17 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=api_token)
|
18 |
|
19 |
-
|
20 |
-
def respond(
|
21 |
-
message,
|
22 |
-
history: list[tuple[str, str]],
|
23 |
-
system_message,
|
24 |
-
max_tokens,
|
25 |
-
temperature,
|
26 |
-
top_p,
|
27 |
-
):
|
28 |
messages = [{"role": "system", "content": system_message}]
|
29 |
-
|
30 |
for val in history:
|
31 |
if val[0]:
|
32 |
messages.append({"role": "user", "content": val[0]})
|
33 |
if val[1]:
|
34 |
messages.append({"role": "assistant", "content": val[1]})
|
35 |
-
|
36 |
messages.append({"role": "user", "content": message})
|
37 |
-
|
38 |
response = ""
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
""
|
54 |
-
|
55 |
-
|
56 |
demo = gr.ChatInterface(
|
57 |
-
respond,
|
|
|
|
|
|
|
58 |
additional_inputs=[
|
59 |
-
gr.Textbox(value="You are a friendly
|
60 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
61 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
62 |
-
gr.Slider(
|
63 |
-
|
64 |
-
maximum=1.0,
|
65 |
-
value=0.95,
|
66 |
-
step=0.05,
|
67 |
-
label="Top-p (nucleus sampling)",
|
68 |
-
),
|
69 |
-
],
|
70 |
)
|
71 |
|
72 |
-
|
73 |
if __name__ == "__main__":
|
74 |
-
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
3 |
from huggingface_hub import InferenceClient
|
4 |
from huggingface_hub.utils import HfHubHTTPError
|
|
|
5 |
from dotenv import load_dotenv
|
6 |
|
7 |
+
# Load environment variables from .env file in parent directory
|
8 |
+
env_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), '.env')
|
9 |
+
load_dotenv(dotenv_path=env_path)
|
10 |
|
|
|
|
|
|
|
11 |
api_token = os.getenv("HUGGINGFACE_API_TOKEN")
|
12 |
if not api_token:
|
13 |
+
raise ValueError("HUGGINGFACE_API_TOKEN environment variable not set. Please check your .env file.")
|
14 |
|
15 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=api_token)
|
16 |
|
17 |
+
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
messages = [{"role": "system", "content": system_message}]
|
19 |
+
|
20 |
for val in history:
|
21 |
if val[0]:
|
22 |
messages.append({"role": "user", "content": val[0]})
|
23 |
if val[1]:
|
24 |
messages.append({"role": "assistant", "content": val[1]})
|
25 |
+
|
26 |
messages.append({"role": "user", "content": message})
|
|
|
27 |
response = ""
|
28 |
+
|
29 |
+
try:
|
30 |
+
for message in client.chat_completion(
|
31 |
+
messages,
|
32 |
+
max_tokens=max_tokens,
|
33 |
+
stream=True,
|
34 |
+
temperature=temperature,
|
35 |
+
top_p=top_p,
|
36 |
+
):
|
37 |
+
if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
|
38 |
+
token = message.choices[0].delta.content
|
39 |
+
response += token
|
40 |
+
yield response
|
41 |
+
except Exception as e:
|
42 |
+
yield f"Error: {str(e)}"
|
43 |
+
|
44 |
+
# Create the Gradio interface
|
45 |
demo = gr.ChatInterface(
|
46 |
+
fn=respond,
|
47 |
+
title="E-commerce Chatbot",
|
48 |
+
description="Ask me anything about products!",
|
49 |
+
examples=["Tell me about gaming laptops", "What are the best smartphones?"],
|
50 |
additional_inputs=[
|
51 |
+
gr.Textbox(value="You are a friendly E-commerce assistant.", label="System message"),
|
52 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
53 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
54 |
+
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
|
55 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
)
|
57 |
|
|
|
58 |
if __name__ == "__main__":
|
59 |
+
try:
|
60 |
+
demo.launch(server_name="0.0.0.0", server_port=8000, share=True)
|
61 |
+
except Exception as e:
|
62 |
+
print(f"Error launching the demo: {e}")
|
backend/config/settings.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic_settings import BaseSettings
|
2 |
+
from typing import Optional
|
3 |
+
|
4 |
+
class Settings(BaseSettings):
|
5 |
+
# API Settings
|
6 |
+
API_HOST: str = "0.0.0.0"
|
7 |
+
API_PORT: int = 8000
|
8 |
+
|
9 |
+
# Model Settings
|
10 |
+
MODEL_NAME: str = "all-MiniLM-L6-v2"
|
11 |
+
MODEL_PATH: str = "../models/product_embeddings.pkl"
|
12 |
+
|
13 |
+
# HuggingFace Settings
|
14 |
+
HUGGINGFACE_API_TOKEN: str
|
15 |
+
HUGGINGFACE_USERNAME: Optional[str] = None
|
16 |
+
|
17 |
+
# BigQuery Settings
|
18 |
+
BIGQUERY_PROJECT_ID: str
|
19 |
+
BIGQUERY_DATASET: str
|
20 |
+
BIGQUERY_TABLE: str
|
21 |
+
GOOGLE_APPLICATION_CREDENTIALS: str
|
22 |
+
|
23 |
+
class Config:
|
24 |
+
env_file = ".env"
|
25 |
+
|
26 |
+
settings = Settings()
|
backend/docker-compose.yml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3.8'
|
2 |
+
|
3 |
+
services:
|
4 |
+
api:
|
5 |
+
build: .
|
6 |
+
ports:
|
7 |
+
- "8000:8000"
|
8 |
+
volumes:
|
9 |
+
- ../models:/app/models
|
10 |
+
env_file:
|
11 |
+
- .env
|
12 |
+
healthcheck:
|
13 |
+
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
14 |
+
interval: 30s
|
15 |
+
timeout: 10s
|
16 |
+
retries: 3
|
backend/models/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Empty file to make the directory a Python package
|
backend/models/schemas.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import List, Optional
|
3 |
+
|
4 |
+
class RecommendationRequest(BaseModel):
|
5 |
+
query: str
|
6 |
+
top_n: Optional[int] = 5
|
7 |
+
|
8 |
+
class ProductRecommendation(BaseModel):
|
9 |
+
product_id: str
|
10 |
+
product_name: str
|
11 |
+
description: str
|
12 |
+
brand: str
|
13 |
+
price: float
|
14 |
+
categories: List[str]
|
15 |
+
score: float
|
16 |
+
|
17 |
+
class RecommendationResponse(BaseModel):
|
18 |
+
recommendations: List[ProductRecommendation]
|
backend/recommend.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import numpy as np
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
+
|
6 |
+
# Load model embeddings
|
7 |
+
with open("../models/product_embeddings.pkl", "rb") as f:
|
8 |
+
data = pickle.load(f)
|
9 |
+
|
10 |
+
# Load transformer model
|
11 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
12 |
+
|
13 |
+
def recommend_products(user_query, top_n=5):
|
14 |
+
"""Find similar products based on user search"""
|
15 |
+
query_embedding = model.encode(user_query).reshape(1, -1)
|
16 |
+
|
17 |
+
# Compute similarity scores
|
18 |
+
similarities = cosine_similarity(query_embedding, data["embeddings"])
|
19 |
+
top_indices = np.argsort(similarities[0])[-top_n:][::-1] # Get top matches
|
20 |
+
|
21 |
+
recommendations = []
|
22 |
+
for i in top_indices:
|
23 |
+
recommendations.append({
|
24 |
+
"search_query": data["search_queries"][i],
|
25 |
+
"product": data["product_names"][i],
|
26 |
+
"score": float(similarities[0][i])
|
27 |
+
})
|
28 |
+
|
29 |
+
return recommendations
|
30 |
+
|
31 |
+
# Example test
|
32 |
+
if __name__ == "__main__":
|
33 |
+
query = "gaming laptop"
|
34 |
+
results = recommend_products(query)
|
35 |
+
for r in results:
|
36 |
+
print(f"🔹 {r['product']} (Score: {r['score']:.2f})")
|
backend/requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
huggingface_hub==0.25.2
|
2 |
+
requests==2.31.0
|
3 |
+
python-dotenv==1.0.0
|
4 |
+
fastapi==0.103.1
|
5 |
+
uvicorn==0.23.2
|
6 |
+
torch>=2.2.0
|
7 |
+
transformers>=4.38.1
|
8 |
+
gradio>=4.19.2
|
9 |
+
sentence-transformers>=2.5.0
|
10 |
+
scikit-learn>=1.4.0
|
11 |
+
numpy>=1.26.0
|
12 |
+
pandas>=2.2.0
|
backend/server.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import warnings
|
3 |
+
import logging
|
4 |
+
from fastapi import FastAPI, HTTPException
|
5 |
+
from fastapi.middleware.cors import CORSMiddleware
|
6 |
+
import uvicorn
|
7 |
+
|
8 |
+
# Suppress TensorFlow warnings
|
9 |
+
warnings.filterwarnings('ignore', category=UserWarning)
|
10 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow logging
|
11 |
+
logging.getLogger('tensorflow').setLevel(logging.ERROR)
|
12 |
+
|
13 |
+
from pydantic import BaseModel
|
14 |
+
import pickle
|
15 |
+
import numpy as np
|
16 |
+
from sentence_transformers import SentenceTransformer
|
17 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
18 |
+
from backend.models.schemas import RecommendationRequest, ProductRecommendation, RecommendationResponse
|
19 |
+
|
20 |
+
# Initialize FastAPI app with metadata
|
21 |
+
app = FastAPI(
|
22 |
+
title="Product Recommendation API",
|
23 |
+
description="API for getting product recommendations based on user queries",
|
24 |
+
version="1.0.0"
|
25 |
+
)
|
26 |
+
|
27 |
+
# Add CORS middleware
|
28 |
+
app.add_middleware(
|
29 |
+
CORSMiddleware,
|
30 |
+
allow_origins=["*"],
|
31 |
+
allow_credentials=True,
|
32 |
+
allow_methods=["*"],
|
33 |
+
allow_headers=["*"],
|
34 |
+
)
|
35 |
+
|
36 |
+
# Load embeddings from the correct path
|
37 |
+
models_dir = os.path.join(os.path.dirname(__file__), "models")
|
38 |
+
embeddings_path = os.path.join(models_dir, "product_embeddings.pkl")
|
39 |
+
|
40 |
+
# Ensure models directory exists
|
41 |
+
os.makedirs(models_dir, exist_ok=True)
|
42 |
+
|
43 |
+
try:
|
44 |
+
# Load embeddings
|
45 |
+
if os.path.exists(embeddings_path):
|
46 |
+
with open(embeddings_path, "rb") as f:
|
47 |
+
data = pickle.load(f)
|
48 |
+
else:
|
49 |
+
raise FileNotFoundError("Product embeddings file not found")
|
50 |
+
|
51 |
+
# Load model
|
52 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
53 |
+
except Exception as e:
|
54 |
+
print(f"Error initializing server: {e}")
|
55 |
+
raise
|
56 |
+
|
57 |
+
@app.get("/")
|
58 |
+
def read_root():
|
59 |
+
return {
|
60 |
+
"message": "Product Recommendation API",
|
61 |
+
"status": "active",
|
62 |
+
"total_products": len(data["product_ids"]) if "product_ids" in data else 0
|
63 |
+
}
|
64 |
+
|
65 |
+
@app.post("/recommend", response_model=RecommendationResponse)
|
66 |
+
def recommend_products(request: RecommendationRequest):
|
67 |
+
"""Get product recommendations based on user search"""
|
68 |
+
try:
|
69 |
+
query_embedding = model.encode(request.query).reshape(1, -1)
|
70 |
+
|
71 |
+
# Compute similarity scores
|
72 |
+
similarities = cosine_similarity(query_embedding, data["embeddings"])
|
73 |
+
top_indices = np.argsort(similarities[0])[-request.top_n:][::-1]
|
74 |
+
|
75 |
+
recommendations = []
|
76 |
+
for i in top_indices:
|
77 |
+
recommendations.append(ProductRecommendation(
|
78 |
+
product_id=data["product_ids"][i],
|
79 |
+
product_name=data["product_names"][i],
|
80 |
+
description=data["descriptions"][i],
|
81 |
+
brand=data["brands"][i],
|
82 |
+
price=float(data["prices"][i]),
|
83 |
+
categories=eval(data["categories"][i]) if isinstance(data["categories"][i], str) else data["categories"][i],
|
84 |
+
score=float(similarities[0][i])
|
85 |
+
))
|
86 |
+
|
87 |
+
return RecommendationResponse(recommendations=recommendations)
|
88 |
+
except Exception as e:
|
89 |
+
raise HTTPException(status_code=500, detail=str(e))
|
90 |
+
|
91 |
+
if __name__ == "__main__":
|
92 |
+
try:
|
93 |
+
uvicorn.run(
|
94 |
+
"backend.server:app",
|
95 |
+
host="0.0.0.0",
|
96 |
+
port=8000,
|
97 |
+
reload=True,
|
98 |
+
access_log=True
|
99 |
+
)
|
100 |
+
except Exception as e:
|
101 |
+
print(f"Error starting server: {e}")
|
backend/tests/conftest.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytest
|
2 |
+
from pathlib import Path
|
3 |
+
import pickle
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
@pytest.fixture
|
7 |
+
def mock_embeddings():
|
8 |
+
return {
|
9 |
+
"embeddings": np.random.rand(10, 384),
|
10 |
+
"search_queries": ["query1", "query2"],
|
11 |
+
"product_names": ["product1", "product2"]
|
12 |
+
}
|
13 |
+
|
14 |
+
@pytest.fixture
|
15 |
+
def mock_model_path(tmp_path, mock_embeddings):
|
16 |
+
model_path = tmp_path / "product_embeddings.pkl"
|
17 |
+
with open(model_path, "wb") as f:
|
18 |
+
pickle.dump(mock_embeddings, f)
|
19 |
+
return model_path
|
backend/tests/test_recommend.py
ADDED
File without changes
|
backend/tests/test_server.py
ADDED
File without changes
|
backend/train.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
from google.cloud import bigquery
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
# Load environment variables
|
11 |
+
load_dotenv()
|
12 |
+
|
13 |
+
def setup_credentials():
|
14 |
+
"""Setup Google Cloud credentials"""
|
15 |
+
creds_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
16 |
+
if not creds_path:
|
17 |
+
raise ValueError("GOOGLE_APPLICATION_CREDENTIALS not set in .env file")
|
18 |
+
|
19 |
+
# Convert to absolute path if relative
|
20 |
+
if not os.path.isabs(creds_path):
|
21 |
+
creds_path = os.path.join(os.path.dirname(__file__), creds_path)
|
22 |
+
|
23 |
+
if not os.path.exists(creds_path):
|
24 |
+
raise FileNotFoundError(f"Credentials file not found at: {creds_path}")
|
25 |
+
|
26 |
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds_path
|
27 |
+
return creds_path
|
28 |
+
|
29 |
+
def main():
|
30 |
+
# Setup credentials
|
31 |
+
creds_path = setup_credentials()
|
32 |
+
print(f"Using credentials from: {creds_path}")
|
33 |
+
|
34 |
+
# BigQuery setup
|
35 |
+
client = bigquery.Client()
|
36 |
+
query = f"""
|
37 |
+
SELECT
|
38 |
+
product_id,
|
39 |
+
product_name,
|
40 |
+
description,
|
41 |
+
category,
|
42 |
+
brand,
|
43 |
+
price
|
44 |
+
FROM `{os.getenv("BIGQUERY_PROJECT_ID")}.{os.getenv("BIGQUERY_DATASET")}.{os.getenv("BIGQUERY_TABLE")}`
|
45 |
+
WHERE status = 'ACTIVE'
|
46 |
+
"""
|
47 |
+
|
48 |
+
try:
|
49 |
+
df = client.query(query).to_dataframe()
|
50 |
+
except Exception as e:
|
51 |
+
print(f"Error querying BigQuery: {e}")
|
52 |
+
return
|
53 |
+
|
54 |
+
# Prepare product descriptions for embedding
|
55 |
+
df['combined_text'] = df.apply(
|
56 |
+
lambda x: f"{x['product_name']} {x['description']} {x['brand']} {' '.join(eval(x['category']))}",
|
57 |
+
axis=1
|
58 |
+
)
|
59 |
+
|
60 |
+
# Load pre-trained NLP model
|
61 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
62 |
+
|
63 |
+
# Generate embeddings for product descriptions
|
64 |
+
print("Generating embeddings...")
|
65 |
+
df["embedding"] = df["combined_text"].apply(lambda x: model.encode(x))
|
66 |
+
|
67 |
+
# Save embeddings
|
68 |
+
embeddings_dict = {
|
69 |
+
"product_ids": df["product_id"].tolist(),
|
70 |
+
"product_names": df["product_name"].tolist(),
|
71 |
+
"descriptions": df["description"].tolist(),
|
72 |
+
"brands": df["brand"].tolist(),
|
73 |
+
"prices": df["price"].tolist(),
|
74 |
+
"categories": df["category"].tolist(),
|
75 |
+
"embeddings": np.vstack(df["embedding"].values)
|
76 |
+
}
|
77 |
+
|
78 |
+
# Save model embeddings
|
79 |
+
output_dir = os.path.join(os.path.dirname(__file__), "models")
|
80 |
+
os.makedirs(output_dir, exist_ok=True)
|
81 |
+
|
82 |
+
output_path = os.path.join(output_dir, "product_embeddings.pkl")
|
83 |
+
with open(output_path, "wb") as f:
|
84 |
+
pickle.dump(embeddings_dict, f)
|
85 |
+
|
86 |
+
print(f"✅ Model trained and embeddings saved to: {output_path}")
|
87 |
+
print(f"Total products processed: {len(df)}")
|
88 |
+
|
89 |
+
if __name__ == "__main__":
|
90 |
+
main()
|
backend/upload_model.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import shutil
|
2 |
+
from huggingface_hub import HfApi
|
3 |
+
|
4 |
+
# Hugging Face setup
|
5 |
+
HF_USERNAME = "your-hf-username"
|
6 |
+
REPO_NAME = f"{HF_USERNAME}/product-recommendation"
|
7 |
+
api = HfApi()
|
8 |
+
|
9 |
+
# Create Hugging Face repo (if not exists)
|
10 |
+
api.create_repo(REPO_NAME, exist_ok=True)
|
11 |
+
|
12 |
+
# Copy embeddings to upload
|
13 |
+
shutil.copy("../models/product_embeddings.pkl", "product_embeddings.pkl")
|
14 |
+
|
15 |
+
# Upload to Hugging Face
|
16 |
+
api.upload_file(
|
17 |
+
path_or_fileobj="product_embeddings.pkl",
|
18 |
+
path_in_repo="product_embeddings.pkl",
|
19 |
+
repo_id=REPO_NAME
|
20 |
+
)
|
21 |
+
|
22 |
+
print(f"✅ Model uploaded to Hugging Face: https://huggingface.co/{REPO_NAME}")
|
backend/utils/helpers.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from pathlib import Path
|
3 |
+
from typing import Optional
|
4 |
+
|
5 |
+
def setup_logging(name: str, level: str = "INFO") -> logging.Logger:
|
6 |
+
logger = logging.getLogger(name)
|
7 |
+
logger.setLevel(level)
|
8 |
+
|
9 |
+
if not logger.handlers:
|
10 |
+
handler = logging.StreamHandler()
|
11 |
+
formatter = logging.Formatter(
|
12 |
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
13 |
+
)
|
14 |
+
handler.setFormatter(formatter)
|
15 |
+
logger.addHandler(handler)
|
16 |
+
|
17 |
+
return logger
|
18 |
+
|
19 |
+
def ensure_model_path(path: Optional[str] = None) -> Path:
|
20 |
+
if path is None:
|
21 |
+
path = "../models"
|
22 |
+
|
23 |
+
model_path = Path(path)
|
24 |
+
model_path.mkdir(parents=True, exist_ok=True)
|
25 |
+
return model_path
|
key/carteya-432304-f95ff2ea1148.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"type": "service_account",
|
3 |
+
"project_id": "carteya-432304",
|
4 |
+
"private_key_id": "f95ff2ea1148613b8c8c5957d7abda01b59a6ba0",
|
5 |
+
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDGFYbpV4yF2Li3\ncijUkelpIe8Tr6/akeqyBwLD/zaypyHjHK12j53ZC3I7mFNgMC5Iz+HLD/QYtIET\nxp33fH5GFIC2d7USeco67Ob9T9SAIQZFVTf0yhmixYO+uCvOobwUwhZqXJVEAdxz\nvRxSWvhaWEb+SQHfIaGlDZskwQgP6UJ0/dYa/I0kOrjDIaoID3p54Wz5+un0AmU3\nD/hWCSiLQNENt2y1Zl+X4GZMQ6N8Cx3OFAzgmFjcxNM8y+lA7vlY/YNhVWpt1Rnx\nIksxs9tGm0VuMOYR1poX5fUCAi+xjBzPAFlCd4YKNPpiR7Z33eiYsYFNwBFn7akI\nS1Uy2TE7AgMBAAECggEAD+ZVdHBzFqFNPmha76kgp7AyHQ3qmyxAPvcCGVJohRGW\nTkEUiWJAxNW+3xq8gomLhYuuL5sBuGPwwnIcZuumxSBLLppQPgHpoi424llgkMJV\n2wXannsEcMTS7OGinozr6Gyl2PbCgnl1xRKGbfPgYHwagrnAfTUZYQLFE7+s0n3+\nC0gzYMc0KxZlBZRRLoDXP+QpaGg8x+C0LKG2VFi2n7LMgPJKxGLJGDBZzCWcxIdh\nWEus1FyYCZfBAtT7Z8sUqYAEZDfuE8huMyXZM+ChMsi1C6T5H30CtpWgfHwZhoRA\nyAopPxAMR1VYKIZQ8aNE0qeE5D8SzboPfKtfeIw0jQKBgQDpjFbb38e00aAvPULP\n8FKvUwIKvIMufpfExNXLpgt3GUZwr3nUJpibn3J2Wr+E2vESBqd56F7/47Uog8Ba\n0OjoCUVofQJsKWeBXz+FSmKuYwXgFxr5JTEd7Q7m6CTJrxODbOcOw6Mgb9vUGtWA\nkdeedBdTfon7PjG6UaXg7AU89QKBgQDZIGjyWUC4xhsgxhEiUdeFvq+hFkkgdNVS\nQA9BGXHuD63qICeQl6637Vxk9R8lEguGmY6JL65Xk62/qzNNxfdM0wHo/83VIqeO\na2PKRbwKAC25alsdDjfuZx5xTF/eIetPtxHnhm5tkLsi1IzhWJhPQamm7DFW307p\n1fgBv5rXbwKBgCKxFQnuMNUcOmYp/G207iNa+jesDvRuG49ACd4JWTSkzYDZoAgy\nf4LGaZNJIZ/TgfHCiUgji1EJexR8lasn0yD8l5HQ1/ZHJDfkdSQQi54J2YqTBA+0\nRZqDZy5Hl4dZ+VURbTUVPHsKAqZ8IfDKOTQxXHGrn0Vx1KMvk6tYg+KtAoGBAL8Q\nHAmWrLNr8JdIc+9lYWgZwmbYO4VKCiWUpVe90lmQhpPNs8MzFtZXEsTsRnnShT3u\nhlUGFj6Of4h4WG8J03JRBA9KepAhLJzQt0FZV/zc51+PzZml12X8a/d7I6lO48iD\n6kd4LlhP0bz2mPn7ghvMRkPS+B/f5YszdEflmZYVAoGANuiMZ2GbCiImxltDYDfz\nDKdi7FZOUXUKpL8gQVXS6bi2B3hq+EyVIfObYauabmeGVUJq8tpUnOEiN1+qvxQv\nkMwjYQz/0lXXQVPczHVZQvBNccP80S+drQP4JlMfRE2riP2EXTOKgWcViR28W1a8\nBCbXn78RmEYNohssQER3/ss=\n-----END PRIVATE KEY-----\n",
|
6 |
+
"client_email": "[email protected]",
|
7 |
+
"client_id": "104264257508211128690",
|
8 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
9 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
10 |
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
11 |
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/bronze-layer%40carteya-432304.iam.gserviceaccount.com",
|
12 |
+
"universe_domain": "googleapis.com"
|
13 |
+
}
|
requirements.txt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
huggingface_hub==0.25.2
|
2 |
-
requests==2.31.0
|
3 |
-
python-dotenv==1.0.0
|
|
|
|
|
|
|
|
run.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uvicorn
|
2 |
+
|
3 |
+
if __name__ == "__main__":
|
4 |
+
uvicorn.run(
|
5 |
+
"backend.server:app",
|
6 |
+
host="0.0.0.0",
|
7 |
+
port=8000,
|
8 |
+
reload=True,
|
9 |
+
access_log=True
|
10 |
+
)
|
setup.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from setuptools import setup, find_packages
|
2 |
+
|
3 |
+
setup(
|
4 |
+
name="product_recommender",
|
5 |
+
version="0.1.0",
|
6 |
+
packages=find_packages(),
|
7 |
+
install_requires=[
|
8 |
+
"fastapi>=0.103.1",
|
9 |
+
"uvicorn>=0.23.2",
|
10 |
+
"sentence-transformers>=2.5.0",
|
11 |
+
"scikit-learn>=1.4.0",
|
12 |
+
"python-dotenv>=1.0.0",
|
13 |
+
"google-cloud-bigquery>=3.13.0",
|
14 |
+
"huggingface-hub>=0.25.2",
|
15 |
+
"pydantic>=2.5.0",
|
16 |
+
"pydantic-settings>=2.1.0",
|
17 |
+
],
|
18 |
+
python_requires=">=3.9",
|
19 |
+
)
|