amaye15 commited on
Commit
192ee60
·
1 Parent(s): a4faf54

Debug - Read Endpoint

Browse files
src/api/database.py CHANGED
@@ -1,384 +1,3 @@
1
- # import logging
2
- # from typing import Dict, List, Optional, AsyncGenerator
3
- # from pydantic import BaseSettings, PostgresDsn
4
- # import pg8000
5
- # from pg8000 import Connection, Cursor
6
- # from pg8000.exceptions import DatabaseError
7
- # import asyncio
8
- # from contextlib import asynccontextmanager
9
- # from dataclasses import dataclass
10
- # from threading import Lock
11
-
12
- # # Set up structured logging
13
- # logging.basicConfig(
14
- # level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
15
- # )
16
- # logger = logging.getLogger(__name__)
17
-
18
-
19
- # class DatabaseSettings(BaseSettings):
20
- # db_url: PostgresDsn
21
- # pool_size: int = 5
22
-
23
- # class Config:
24
- # env_file = ".env"
25
-
26
-
27
- # @dataclass
28
- # class DatabaseConfig:
29
- # username: str
30
- # password: str
31
- # hostname: str
32
- # port: int
33
- # database: str
34
-
35
-
36
- # class DatabaseError(Exception):
37
- # """Custom exception for database errors."""
38
-
39
- # pass
40
-
41
-
42
- # class Database:
43
- # def __init__(self, db_url: str, pool_size: int):
44
- # self.db_url = db_url
45
- # self.pool_size = pool_size
46
- # self.pool: List[Connection] = []
47
- # self.lock = Lock()
48
- # self.config = self._parse_db_url()
49
-
50
- # def _parse_db_url(self) -> DatabaseConfig:
51
- # """Parse the database URL into components."""
52
- # result = urlparse(self.db_url)
53
- # return DatabaseConfig(
54
- # username=result.username,
55
- # password=result.password,
56
- # hostname=result.hostname,
57
- # port=result.port or 5432,
58
- # database=result.path.lstrip("/"),
59
- # )
60
-
61
- # async def connect(self) -> None:
62
- # """Create a connection pool."""
63
- # try:
64
- # for _ in range(self.pool_size):
65
- # conn = await self._create_connection()
66
- # self.pool.append(conn)
67
- # logger.info(
68
- # f"Database connection pool created with {self.pool_size} connections."
69
- # )
70
- # except DatabaseError as e:
71
- # logger.error(f"Failed to create database connection pool: {e}")
72
- # raise
73
-
74
- # async def _create_connection(self) -> Connection:
75
- # """Create a single database connection."""
76
- # try:
77
- # conn = pg8000.connect(
78
- # user=self.config.username,
79
- # password=self.config.password,
80
- # host=self.config.hostname,
81
- # port=self.config.port,
82
- # database=self.config.database,
83
- # )
84
- # return conn
85
- # except DatabaseError as e:
86
- # logger.error(f"Failed to create database connection: {e}")
87
- # raise DatabaseError("Failed to create database connection.")
88
-
89
- # async def disconnect(self) -> None:
90
- # """Close all connections in the pool."""
91
- # with self.lock:
92
- # for conn in self.pool:
93
- # conn.close()
94
- # self.pool.clear()
95
- # logger.info("Database connection pool closed.")
96
-
97
- # @asynccontextmanager
98
- # async def get_connection(self) -> AsyncGenerator[Connection, None]:
99
- # """Acquire a connection from the pool."""
100
- # with self.lock:
101
- # if not self.pool:
102
- # raise DatabaseError("Database connection pool is empty.")
103
- # conn = self.pool.pop()
104
- # try:
105
- # yield conn
106
- # finally:
107
- # with self.lock:
108
- # self.pool.append(conn)
109
-
110
- # async def fetch(self, query: str, *args) -> List[Dict]:
111
- # """
112
- # Execute a SELECT query and return the results as a list of dictionaries.
113
-
114
- # Args:
115
- # query (str): The SQL query to execute.
116
- # *args: Query parameters.
117
-
118
- # Returns:
119
- # List[Dict]: A list of dictionaries where keys are column names and values are column values.
120
- # """
121
- # try:
122
- # async with self.get_connection() as conn:
123
- # cursor: Cursor = conn.cursor()
124
- # cursor.execute(query, args)
125
- # rows = cursor.fetchall()
126
- # columns = [desc[0] for desc in cursor.description]
127
- # return [dict(zip(columns, row)) for row in rows]
128
- # except DatabaseError as e:
129
- # logger.error(f"Error executing query: {query}. Error: {e}")
130
- # raise DatabaseError(f"Failed to execute query: {query}")
131
-
132
- # async def execute(self, query: str, *args) -> None:
133
- # """
134
- # Execute an INSERT, UPDATE, or DELETE query.
135
-
136
- # Args:
137
- # query (str): The SQL query to execute.
138
- # *args: Query parameters.
139
- # """
140
- # try:
141
- # async with self.get_connection() as conn:
142
- # cursor: Cursor = conn.cursor()
143
- # cursor.execute(query, args)
144
- # conn.commit()
145
- # except DatabaseError as e:
146
- # logger.error(f"Error executing query: {query}. Error: {e}")
147
- # raise DatabaseError(f"Failed to execute query: {query}")
148
-
149
-
150
- # # Dependency to get the database instance
151
- # async def get_db() -> AsyncGenerator[Database, None]:
152
- # settings = DatabaseSettings()
153
- # db = Database(db_url=settings.db_url, pool_size=settings.pool_size)
154
- # await db.connect()
155
- # try:
156
- # yield db
157
- # finally:
158
- # await db.disconnect()
159
-
160
-
161
- # # Example usage
162
- # if __name__ == "__main__":
163
-
164
- # async def main():
165
- # settings = DatabaseSettings()
166
- # db = Database(db_url=settings.db_url, pool_size=settings.pool_size)
167
- # await db.connect()
168
-
169
- # try:
170
- # # Example query
171
- # query = """
172
- # SELECT
173
- # ppt.type AS product_type,
174
- # pc.name AS product_category
175
- # FROM
176
- # product_producttype ppt
177
- # INNER JOIN
178
- # product_category pc
179
- # ON
180
- # ppt.category_id = pc.id
181
- # """
182
- # result = await db.fetch(query)
183
- # print(result)
184
- # finally:
185
- # await db.disconnect()
186
-
187
- # asyncio.run(main())
188
-
189
- # import logging
190
- # from urllib.parse import urlparse
191
- # from typing import Dict, List, Optional, AsyncGenerator
192
- # from pydantic_settings import BaseSettings
193
- # from pydantic import PostgresDsn
194
- # import pg8000
195
- # from pg8000 import Connection, Cursor
196
- # from pg8000.exceptions import DatabaseError
197
- # import asyncio
198
- # from contextlib import asynccontextmanager
199
- # from dataclasses import dataclass
200
- # from threading import Lock
201
-
202
- # # Set up structured logging
203
- # logging.basicConfig(
204
- # level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
205
- # )
206
- # logger = logging.getLogger(__name__)
207
-
208
-
209
- # class DatabaseSettings(BaseSettings):
210
- # db_url: PostgresDsn
211
- # pool_size: int = 5
212
-
213
- # class Config:
214
- # env_file = ".env"
215
-
216
-
217
- # @dataclass
218
- # class DatabaseConfig:
219
- # username: str
220
- # password: str
221
- # hostname: str
222
- # port: int
223
- # database: str
224
-
225
-
226
- # class DatabaseError(Exception):
227
- # """Custom exception for database errors."""
228
-
229
- # pass
230
-
231
-
232
- # class Database:
233
- # def __init__(self, db_url: str, pool_size: int):
234
- # self.db_url = db_url
235
- # self.pool_size = pool_size
236
- # self.pool: List[Connection] = []
237
- # self.lock = Lock()
238
- # self.config = self._parse_db_url()
239
-
240
- # def _parse_db_url(self) -> DatabaseConfig:
241
- # """Parse the database URL into components."""
242
- # # Convert PostgresDsn to a string
243
- # db_url_str = str(self.db_url)
244
- # result = urlparse(db_url_str)
245
- # return DatabaseConfig(
246
- # username=result.username,
247
- # password=result.password,
248
- # hostname=result.hostname,
249
- # port=result.port or 5432,
250
- # database=result.path.lstrip("/"),
251
- # )
252
-
253
- # async def connect(self) -> None:
254
- # """Create a connection pool."""
255
- # try:
256
- # for _ in range(self.pool_size):
257
- # conn = await self._create_connection()
258
- # self.pool.append(conn)
259
- # logger.info(
260
- # f"Database connection pool created with {self.pool_size} connections."
261
- # )
262
- # except DatabaseError as e:
263
- # logger.error(f"Failed to create database connection pool: {e}")
264
- # raise
265
-
266
- # async def _create_connection(self) -> Connection:
267
- # """Create a single database connection."""
268
- # try:
269
- # conn = pg8000.connect(
270
- # user=self.config.username,
271
- # password=self.config.password,
272
- # host=self.config.hostname,
273
- # port=self.config.port,
274
- # database=self.config.database,
275
- # )
276
- # return conn
277
- # except DatabaseError as e:
278
- # logger.error(f"Failed to create database connection: {e}")
279
- # raise DatabaseError("Failed to create database connection.")
280
-
281
- # async def disconnect(self) -> None:
282
- # """Close all connections in the pool."""
283
- # with self.lock:
284
- # for conn in self.pool:
285
- # conn.close()
286
- # self.pool.clear()
287
- # logger.info("Database connection pool closed.")
288
-
289
- # @asynccontextmanager
290
- # async def get_connection(self) -> AsyncGenerator[Connection, None]:
291
- # """Acquire a connection from the pool."""
292
- # with self.lock:
293
- # if not self.pool:
294
- # raise DatabaseError("Database connection pool is empty.")
295
- # conn = self.pool.pop()
296
- # try:
297
- # yield conn
298
- # finally:
299
- # with self.lock:
300
- # self.pool.append(conn)
301
-
302
- # async def fetch(self, query: str, *args) -> List[Dict]:
303
- # """
304
- # Execute a SELECT query and return the results as a list of dictionaries.
305
-
306
- # Args:
307
- # query (str): The SQL query to execute.
308
- # *args: Query parameters.
309
-
310
- # Returns:
311
- # List[Dict]: A list of dictionaries where keys are column names and values are column values.
312
- # """
313
- # try:
314
- # async with self.get_connection() as conn:
315
- # cursor: Cursor = conn.cursor()
316
- # cursor.execute(query, args)
317
- # rows = cursor.fetchall()
318
- # columns = [desc[0] for desc in cursor.description]
319
- # return [dict(zip(columns, row)) for row in rows]
320
- # except DatabaseError as e:
321
- # logger.error(f"Error executing query: {query}. Error: {e}")
322
- # raise DatabaseError(f"Failed to execute query: {query}")
323
-
324
- # async def execute(self, query: str, *args) -> None:
325
- # """
326
- # Execute an INSERT, UPDATE, or DELETE query.
327
-
328
- # Args:
329
- # query (str): The SQL query to execute.
330
- # *args: Query parameters.
331
- # """
332
- # try:
333
- # async with self.get_connection() as conn:
334
- # cursor: Cursor = conn.cursor()
335
- # cursor.execute(query, args)
336
- # conn.commit()
337
- # except DatabaseError as e:
338
- # logger.error(f"Error executing query: {query}. Error: {e}")
339
- # raise DatabaseError(f"Failed to execute query: {query}")
340
-
341
-
342
- # # Dependency to get the database instance
343
- # async def get_db() -> AsyncGenerator[Database, None]:
344
- # settings = DatabaseSettings()
345
- # db = Database(db_url=settings.db_url, pool_size=settings.pool_size)
346
- # await db.connect()
347
- # try:
348
- # yield db
349
- # finally:
350
- # await db.disconnect()
351
-
352
-
353
- # # Example usage
354
- # if __name__ == "__main__":
355
-
356
- # async def main():
357
- # settings = DatabaseSettings()
358
- # db = Database(db_url=settings.db_url, pool_size=settings.pool_size)
359
- # await db.connect()
360
-
361
- # try:
362
- # # Example query
363
- # query = "SELECT * FROM your_table LIMIT 10"
364
- # query = """
365
- # SELECT
366
- # ppt.type AS product_type,
367
- # pc.name AS product_category
368
- # FROM
369
- # product_producttype ppt
370
- # INNER JOIN
371
- # product_category pc
372
- # ON
373
- # ppt.category_id = pc.id
374
- # """
375
- # result = await db.fetch(query)
376
- # print(result)
377
- # finally:
378
- # await db.disconnect()
379
-
380
- # asyncio.run(main())
381
-
382
  import logging
383
  from typing import AsyncGenerator, List, Optional, Dict
384
  from pydantic_settings import BaseSettings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import logging
2
  from typing import AsyncGenerator, List, Optional, Dict
3
  from pydantic_settings import BaseSettings
src/api/models/embedding_models.py CHANGED
@@ -13,6 +13,10 @@ class CreateEmbeddingRequest(BaseModel):
13
  dataset_name: str = "re-mind/product_type_embedding"
14
 
15
 
 
 
 
 
16
  class UpdateEmbeddingRequest(BaseModel):
17
  dataset_name: str
18
  updates: Dict[str, List] # Column name -> List of values
 
13
  dataset_name: str = "re-mind/product_type_embedding"
14
 
15
 
16
+ class ReadEmbeddingRequest(BaseModel):
17
+ dataset_name: str
18
+
19
+
20
  class UpdateEmbeddingRequest(BaseModel):
21
  dataset_name: str
22
  updates: Dict[str, List] # Column name -> List of values
src/api/services/huggingface_service.py CHANGED
@@ -1,73 +1,3 @@
1
- # from datasets import Dataset, load_dataset
2
- # import logging
3
- # from typing import Optional, Dict, List
4
- # import pandas as pd
5
- # from src.api.exceptions import DatasetNotFoundError, DatasetPushError
6
-
7
- # # Set up structured logging
8
- # logging.basicConfig(
9
- # level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
10
- # )
11
- # logger = logging.getLogger(__name__)
12
-
13
-
14
- # class HuggingFaceService:
15
- # async def push_to_hub(self, df: pd.DataFrame, dataset_name: str) -> None:
16
- # """Push the dataset to Hugging Face Hub."""
17
- # try:
18
- # logger.info(f"Creating Hugging Face Dataset: {dataset_name}...")
19
- # ds = Dataset.from_pandas(df)
20
- # ds.push_to_hub(dataset_name)
21
- # logger.info(f"Dataset pushed to Hugging Face Hub: {dataset_name}")
22
- # except Exception as e:
23
- # logger.error(f"Failed to push dataset to Hugging Face Hub: {e}")
24
- # raise DatasetPushError(f"Failed to push dataset: {e}")
25
-
26
- # async def read_dataset(self, dataset_name: str) -> Optional[pd.DataFrame]:
27
- # """Read a dataset from Hugging Face Hub."""
28
- # try:
29
- # logger.info(f"Loading dataset from Hugging Face Hub: {dataset_name}...")
30
- # ds = load_dataset(dataset_name)
31
- # df = ds["train"].to_pandas()
32
- # return df
33
- # except Exception as e:
34
- # logger.error(f"Failed to read dataset: {e}")
35
- # raise DatasetNotFoundError(f"Dataset not found: {e}")
36
-
37
- # async def update_dataset(
38
- # self, dataset_name: str, updates: Dict[str, List]
39
- # ) -> Optional[pd.DataFrame]:
40
- # """Update a dataset on Hugging Face Hub."""
41
- # try:
42
- # df = await self.read_dataset(dataset_name)
43
- # for column, values in updates.items():
44
- # if column in df.columns:
45
- # df[column] = values
46
- # else:
47
- # logger.warning(f"Column '{column}' not found in dataset.")
48
- # await self.push_to_hub(df, dataset_name)
49
- # return df
50
- # except Exception as e:
51
- # logger.error(f"Failed to update dataset: {e}")
52
- # raise DatasetPushError(f"Failed to update dataset: {e}")
53
-
54
- # async def delete_columns(
55
- # self, dataset_name: str, columns: List[str]
56
- # ) -> Optional[pd.DataFrame]:
57
- # """Delete columns from a dataset on Hugging Face Hub."""
58
- # try:
59
- # df = await self.read_dataset(dataset_name)
60
- # for column in columns:
61
- # if column in df.columns:
62
- # df.drop(column, axis=1, inplace=True)
63
- # else:
64
- # logger.warning(f"Column '{column}' not found in dataset.")
65
- # await self.push_to_hub(df, dataset_name)
66
- # return df
67
- # except Exception as e:
68
- # logger.error(f"Failed to delete columns: {e}")
69
- # raise DatasetPushError(f"Failed to delete columns: {e}")
70
-
71
  from datasets import Dataset, load_dataset
72
  from huggingface_hub import HfApi, HfFolder
73
  import logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from datasets import Dataset, load_dataset
2
  from huggingface_hub import HfApi, HfFolder
3
  import logging
src/main.py CHANGED
@@ -5,6 +5,7 @@ from pydantic import BaseModel
5
  from typing import List, Dict
6
  from src.api.models.embedding_models import (
7
  CreateEmbeddingRequest,
 
8
  UpdateEmbeddingRequest,
9
  DeleteEmbeddingRequest,
10
  )
@@ -114,16 +115,17 @@ async def create_embedding(
114
 
115
 
116
  # Endpoint to read embeddings
117
- @app.get("/read_embeddings/{dataset_name}")
 
118
  async def read_embeddings(
119
- dataset_name: str,
120
  huggingface_service: HuggingFaceService = Depends(get_huggingface_service),
121
  ):
122
  """
123
  Read embeddings from a Hugging Face dataset.
124
  """
125
  try:
126
- df = await huggingface_service.read_dataset(dataset_name)
127
  return df.to_dict(orient="records")
128
  except DatasetNotFoundError as e:
129
  logger.error(f"Dataset not found: {e}")
@@ -168,9 +170,7 @@ async def delete_embeddings(
168
  Delete embeddings from a Hugging Face dataset.
169
  """
170
  try:
171
- await huggingface_service.delete_dataset(
172
- request.dataset_name
173
- )
174
  return {
175
  "message": "Embeddings deleted successfully.",
176
  "dataset_name": request.dataset_name,
 
5
  from typing import List, Dict
6
  from src.api.models.embedding_models import (
7
  CreateEmbeddingRequest,
8
+ ReadEmbeddingRequest,
9
  UpdateEmbeddingRequest,
10
  DeleteEmbeddingRequest,
11
  )
 
115
 
116
 
117
  # Endpoint to read embeddings
118
+ # @app.get("/read_embeddings/{dataset_name}")
119
+ @app.get("/read_embeddings")
120
  async def read_embeddings(
121
+ request: ReadEmbeddingRequest,
122
  huggingface_service: HuggingFaceService = Depends(get_huggingface_service),
123
  ):
124
  """
125
  Read embeddings from a Hugging Face dataset.
126
  """
127
  try:
128
+ df = await huggingface_service.read_dataset(request.dataset_name)
129
  return df.to_dict(orient="records")
130
  except DatasetNotFoundError as e:
131
  logger.error(f"Dataset not found: {e}")
 
170
  Delete embeddings from a Hugging Face dataset.
171
  """
172
  try:
173
+ await huggingface_service.delete_dataset(request.dataset_name)
 
 
174
  return {
175
  "message": "Embeddings deleted successfully.",
176
  "dataset_name": request.dataset_name,