Spaces:
Running
Running
amaye15
commited on
Commit
·
e0b1978
1
Parent(s):
bc82930
Feat - Progress updated
Browse files
src/api/services/embedding_service.py
CHANGED
@@ -87,6 +87,8 @@ class EmbeddingService:
|
|
87 |
self.model = model
|
88 |
self.batch_size = batch_size
|
89 |
self.semaphore = asyncio.Semaphore(max_concurrent_requests) # Rate limiter
|
|
|
|
|
90 |
|
91 |
async def get_embedding(self, text: str) -> List[float]:
|
92 |
"""Generate embeddings for the given text using OpenAI."""
|
@@ -96,6 +98,8 @@ class EmbeddingService:
|
|
96 |
response = await self.client.embeddings.create(
|
97 |
input=[text], model=self.model
|
98 |
)
|
|
|
|
|
99 |
return response.data[0].embedding
|
100 |
except Exception as e:
|
101 |
logger.error(f"Failed to generate embedding: {e}")
|
@@ -106,6 +110,9 @@ class EmbeddingService:
|
|
106 |
) -> pd.DataFrame:
|
107 |
"""Create embeddings for the target column in the dataset."""
|
108 |
logger.info("Generating embeddings...")
|
|
|
|
|
|
|
109 |
batches = [
|
110 |
df[i : i + self.batch_size] for i in range(0, len(df), self.batch_size)
|
111 |
]
|
@@ -126,3 +133,10 @@ class EmbeddingService:
|
|
126 |
)
|
127 |
df_batch[output_column] = embeddings
|
128 |
return df_batch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
self.model = model
|
88 |
self.batch_size = batch_size
|
89 |
self.semaphore = asyncio.Semaphore(max_concurrent_requests) # Rate limiter
|
90 |
+
self.total_requests = 0 # Total number of requests to process
|
91 |
+
self.completed_requests = 0 # Number of completed requests
|
92 |
|
93 |
async def get_embedding(self, text: str) -> List[float]:
|
94 |
"""Generate embeddings for the given text using OpenAI."""
|
|
|
98 |
response = await self.client.embeddings.create(
|
99 |
input=[text], model=self.model
|
100 |
)
|
101 |
+
self.completed_requests += 1 # Increment completed requests
|
102 |
+
self._log_progress() # Log progress
|
103 |
return response.data[0].embedding
|
104 |
except Exception as e:
|
105 |
logger.error(f"Failed to generate embedding: {e}")
|
|
|
110 |
) -> pd.DataFrame:
|
111 |
"""Create embeddings for the target column in the dataset."""
|
112 |
logger.info("Generating embeddings...")
|
113 |
+
self.total_requests = len(df) # Set total number of requests
|
114 |
+
self.completed_requests = 0 # Reset completed requests counter
|
115 |
+
|
116 |
batches = [
|
117 |
df[i : i + self.batch_size] for i in range(0, len(df), self.batch_size)
|
118 |
]
|
|
|
133 |
)
|
134 |
df_batch[output_column] = embeddings
|
135 |
return df_batch
|
136 |
+
|
137 |
+
def _log_progress(self):
|
138 |
+
"""Log the progress of embedding generation."""
|
139 |
+
progress = (self.completed_requests / self.total_requests) * 100
|
140 |
+
logger.info(
|
141 |
+
f"Progress: {self.completed_requests}/{self.total_requests} ({progress:.2f}%)"
|
142 |
+
)
|