amaye15 commited on
Commit
e0b1978
·
1 Parent(s): bc82930

Feat - Progress updated

Browse files
src/api/services/embedding_service.py CHANGED
@@ -87,6 +87,8 @@ class EmbeddingService:
87
  self.model = model
88
  self.batch_size = batch_size
89
  self.semaphore = asyncio.Semaphore(max_concurrent_requests) # Rate limiter
 
 
90
 
91
  async def get_embedding(self, text: str) -> List[float]:
92
  """Generate embeddings for the given text using OpenAI."""
@@ -96,6 +98,8 @@ class EmbeddingService:
96
  response = await self.client.embeddings.create(
97
  input=[text], model=self.model
98
  )
 
 
99
  return response.data[0].embedding
100
  except Exception as e:
101
  logger.error(f"Failed to generate embedding: {e}")
@@ -106,6 +110,9 @@ class EmbeddingService:
106
  ) -> pd.DataFrame:
107
  """Create embeddings for the target column in the dataset."""
108
  logger.info("Generating embeddings...")
 
 
 
109
  batches = [
110
  df[i : i + self.batch_size] for i in range(0, len(df), self.batch_size)
111
  ]
@@ -126,3 +133,10 @@ class EmbeddingService:
126
  )
127
  df_batch[output_column] = embeddings
128
  return df_batch
 
 
 
 
 
 
 
 
87
  self.model = model
88
  self.batch_size = batch_size
89
  self.semaphore = asyncio.Semaphore(max_concurrent_requests) # Rate limiter
90
+ self.total_requests = 0 # Total number of requests to process
91
+ self.completed_requests = 0 # Number of completed requests
92
 
93
  async def get_embedding(self, text: str) -> List[float]:
94
  """Generate embeddings for the given text using OpenAI."""
 
98
  response = await self.client.embeddings.create(
99
  input=[text], model=self.model
100
  )
101
+ self.completed_requests += 1 # Increment completed requests
102
+ self._log_progress() # Log progress
103
  return response.data[0].embedding
104
  except Exception as e:
105
  logger.error(f"Failed to generate embedding: {e}")
 
110
  ) -> pd.DataFrame:
111
  """Create embeddings for the target column in the dataset."""
112
  logger.info("Generating embeddings...")
113
+ self.total_requests = len(df) # Set total number of requests
114
+ self.completed_requests = 0 # Reset completed requests counter
115
+
116
  batches = [
117
  df[i : i + self.batch_size] for i in range(0, len(df), self.batch_size)
118
  ]
 
133
  )
134
  df_batch[output_column] = embeddings
135
  return df_batch
136
+
137
+ def _log_progress(self):
138
+ """Log the progress of embedding generation."""
139
+ progress = (self.completed_requests / self.total_requests) * 100
140
+ logger.info(
141
+ f"Progress: {self.completed_requests}/{self.total_requests} ({progress:.2f}%)"
142
+ )