Spaces:
Running
Running
amaye15
commited on
Commit
·
192ee60
1
Parent(s):
a4faf54
Debug - Read Endpoint
Browse files- src/api/database.py +0 -381
- src/api/models/embedding_models.py +4 -0
- src/api/services/huggingface_service.py +0 -70
- src/main.py +6 -6
src/api/database.py
CHANGED
@@ -1,384 +1,3 @@
|
|
1 |
-
# import logging
|
2 |
-
# from typing import Dict, List, Optional, AsyncGenerator
|
3 |
-
# from pydantic import BaseSettings, PostgresDsn
|
4 |
-
# import pg8000
|
5 |
-
# from pg8000 import Connection, Cursor
|
6 |
-
# from pg8000.exceptions import DatabaseError
|
7 |
-
# import asyncio
|
8 |
-
# from contextlib import asynccontextmanager
|
9 |
-
# from dataclasses import dataclass
|
10 |
-
# from threading import Lock
|
11 |
-
|
12 |
-
# # Set up structured logging
|
13 |
-
# logging.basicConfig(
|
14 |
-
# level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
15 |
-
# )
|
16 |
-
# logger = logging.getLogger(__name__)
|
17 |
-
|
18 |
-
|
19 |
-
# class DatabaseSettings(BaseSettings):
|
20 |
-
# db_url: PostgresDsn
|
21 |
-
# pool_size: int = 5
|
22 |
-
|
23 |
-
# class Config:
|
24 |
-
# env_file = ".env"
|
25 |
-
|
26 |
-
|
27 |
-
# @dataclass
|
28 |
-
# class DatabaseConfig:
|
29 |
-
# username: str
|
30 |
-
# password: str
|
31 |
-
# hostname: str
|
32 |
-
# port: int
|
33 |
-
# database: str
|
34 |
-
|
35 |
-
|
36 |
-
# class DatabaseError(Exception):
|
37 |
-
# """Custom exception for database errors."""
|
38 |
-
|
39 |
-
# pass
|
40 |
-
|
41 |
-
|
42 |
-
# class Database:
|
43 |
-
# def __init__(self, db_url: str, pool_size: int):
|
44 |
-
# self.db_url = db_url
|
45 |
-
# self.pool_size = pool_size
|
46 |
-
# self.pool: List[Connection] = []
|
47 |
-
# self.lock = Lock()
|
48 |
-
# self.config = self._parse_db_url()
|
49 |
-
|
50 |
-
# def _parse_db_url(self) -> DatabaseConfig:
|
51 |
-
# """Parse the database URL into components."""
|
52 |
-
# result = urlparse(self.db_url)
|
53 |
-
# return DatabaseConfig(
|
54 |
-
# username=result.username,
|
55 |
-
# password=result.password,
|
56 |
-
# hostname=result.hostname,
|
57 |
-
# port=result.port or 5432,
|
58 |
-
# database=result.path.lstrip("/"),
|
59 |
-
# )
|
60 |
-
|
61 |
-
# async def connect(self) -> None:
|
62 |
-
# """Create a connection pool."""
|
63 |
-
# try:
|
64 |
-
# for _ in range(self.pool_size):
|
65 |
-
# conn = await self._create_connection()
|
66 |
-
# self.pool.append(conn)
|
67 |
-
# logger.info(
|
68 |
-
# f"Database connection pool created with {self.pool_size} connections."
|
69 |
-
# )
|
70 |
-
# except DatabaseError as e:
|
71 |
-
# logger.error(f"Failed to create database connection pool: {e}")
|
72 |
-
# raise
|
73 |
-
|
74 |
-
# async def _create_connection(self) -> Connection:
|
75 |
-
# """Create a single database connection."""
|
76 |
-
# try:
|
77 |
-
# conn = pg8000.connect(
|
78 |
-
# user=self.config.username,
|
79 |
-
# password=self.config.password,
|
80 |
-
# host=self.config.hostname,
|
81 |
-
# port=self.config.port,
|
82 |
-
# database=self.config.database,
|
83 |
-
# )
|
84 |
-
# return conn
|
85 |
-
# except DatabaseError as e:
|
86 |
-
# logger.error(f"Failed to create database connection: {e}")
|
87 |
-
# raise DatabaseError("Failed to create database connection.")
|
88 |
-
|
89 |
-
# async def disconnect(self) -> None:
|
90 |
-
# """Close all connections in the pool."""
|
91 |
-
# with self.lock:
|
92 |
-
# for conn in self.pool:
|
93 |
-
# conn.close()
|
94 |
-
# self.pool.clear()
|
95 |
-
# logger.info("Database connection pool closed.")
|
96 |
-
|
97 |
-
# @asynccontextmanager
|
98 |
-
# async def get_connection(self) -> AsyncGenerator[Connection, None]:
|
99 |
-
# """Acquire a connection from the pool."""
|
100 |
-
# with self.lock:
|
101 |
-
# if not self.pool:
|
102 |
-
# raise DatabaseError("Database connection pool is empty.")
|
103 |
-
# conn = self.pool.pop()
|
104 |
-
# try:
|
105 |
-
# yield conn
|
106 |
-
# finally:
|
107 |
-
# with self.lock:
|
108 |
-
# self.pool.append(conn)
|
109 |
-
|
110 |
-
# async def fetch(self, query: str, *args) -> List[Dict]:
|
111 |
-
# """
|
112 |
-
# Execute a SELECT query and return the results as a list of dictionaries.
|
113 |
-
|
114 |
-
# Args:
|
115 |
-
# query (str): The SQL query to execute.
|
116 |
-
# *args: Query parameters.
|
117 |
-
|
118 |
-
# Returns:
|
119 |
-
# List[Dict]: A list of dictionaries where keys are column names and values are column values.
|
120 |
-
# """
|
121 |
-
# try:
|
122 |
-
# async with self.get_connection() as conn:
|
123 |
-
# cursor: Cursor = conn.cursor()
|
124 |
-
# cursor.execute(query, args)
|
125 |
-
# rows = cursor.fetchall()
|
126 |
-
# columns = [desc[0] for desc in cursor.description]
|
127 |
-
# return [dict(zip(columns, row)) for row in rows]
|
128 |
-
# except DatabaseError as e:
|
129 |
-
# logger.error(f"Error executing query: {query}. Error: {e}")
|
130 |
-
# raise DatabaseError(f"Failed to execute query: {query}")
|
131 |
-
|
132 |
-
# async def execute(self, query: str, *args) -> None:
|
133 |
-
# """
|
134 |
-
# Execute an INSERT, UPDATE, or DELETE query.
|
135 |
-
|
136 |
-
# Args:
|
137 |
-
# query (str): The SQL query to execute.
|
138 |
-
# *args: Query parameters.
|
139 |
-
# """
|
140 |
-
# try:
|
141 |
-
# async with self.get_connection() as conn:
|
142 |
-
# cursor: Cursor = conn.cursor()
|
143 |
-
# cursor.execute(query, args)
|
144 |
-
# conn.commit()
|
145 |
-
# except DatabaseError as e:
|
146 |
-
# logger.error(f"Error executing query: {query}. Error: {e}")
|
147 |
-
# raise DatabaseError(f"Failed to execute query: {query}")
|
148 |
-
|
149 |
-
|
150 |
-
# # Dependency to get the database instance
|
151 |
-
# async def get_db() -> AsyncGenerator[Database, None]:
|
152 |
-
# settings = DatabaseSettings()
|
153 |
-
# db = Database(db_url=settings.db_url, pool_size=settings.pool_size)
|
154 |
-
# await db.connect()
|
155 |
-
# try:
|
156 |
-
# yield db
|
157 |
-
# finally:
|
158 |
-
# await db.disconnect()
|
159 |
-
|
160 |
-
|
161 |
-
# # Example usage
|
162 |
-
# if __name__ == "__main__":
|
163 |
-
|
164 |
-
# async def main():
|
165 |
-
# settings = DatabaseSettings()
|
166 |
-
# db = Database(db_url=settings.db_url, pool_size=settings.pool_size)
|
167 |
-
# await db.connect()
|
168 |
-
|
169 |
-
# try:
|
170 |
-
# # Example query
|
171 |
-
# query = """
|
172 |
-
# SELECT
|
173 |
-
# ppt.type AS product_type,
|
174 |
-
# pc.name AS product_category
|
175 |
-
# FROM
|
176 |
-
# product_producttype ppt
|
177 |
-
# INNER JOIN
|
178 |
-
# product_category pc
|
179 |
-
# ON
|
180 |
-
# ppt.category_id = pc.id
|
181 |
-
# """
|
182 |
-
# result = await db.fetch(query)
|
183 |
-
# print(result)
|
184 |
-
# finally:
|
185 |
-
# await db.disconnect()
|
186 |
-
|
187 |
-
# asyncio.run(main())
|
188 |
-
|
189 |
-
# import logging
|
190 |
-
# from urllib.parse import urlparse
|
191 |
-
# from typing import Dict, List, Optional, AsyncGenerator
|
192 |
-
# from pydantic_settings import BaseSettings
|
193 |
-
# from pydantic import PostgresDsn
|
194 |
-
# import pg8000
|
195 |
-
# from pg8000 import Connection, Cursor
|
196 |
-
# from pg8000.exceptions import DatabaseError
|
197 |
-
# import asyncio
|
198 |
-
# from contextlib import asynccontextmanager
|
199 |
-
# from dataclasses import dataclass
|
200 |
-
# from threading import Lock
|
201 |
-
|
202 |
-
# # Set up structured logging
|
203 |
-
# logging.basicConfig(
|
204 |
-
# level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
205 |
-
# )
|
206 |
-
# logger = logging.getLogger(__name__)
|
207 |
-
|
208 |
-
|
209 |
-
# class DatabaseSettings(BaseSettings):
|
210 |
-
# db_url: PostgresDsn
|
211 |
-
# pool_size: int = 5
|
212 |
-
|
213 |
-
# class Config:
|
214 |
-
# env_file = ".env"
|
215 |
-
|
216 |
-
|
217 |
-
# @dataclass
|
218 |
-
# class DatabaseConfig:
|
219 |
-
# username: str
|
220 |
-
# password: str
|
221 |
-
# hostname: str
|
222 |
-
# port: int
|
223 |
-
# database: str
|
224 |
-
|
225 |
-
|
226 |
-
# class DatabaseError(Exception):
|
227 |
-
# """Custom exception for database errors."""
|
228 |
-
|
229 |
-
# pass
|
230 |
-
|
231 |
-
|
232 |
-
# class Database:
|
233 |
-
# def __init__(self, db_url: str, pool_size: int):
|
234 |
-
# self.db_url = db_url
|
235 |
-
# self.pool_size = pool_size
|
236 |
-
# self.pool: List[Connection] = []
|
237 |
-
# self.lock = Lock()
|
238 |
-
# self.config = self._parse_db_url()
|
239 |
-
|
240 |
-
# def _parse_db_url(self) -> DatabaseConfig:
|
241 |
-
# """Parse the database URL into components."""
|
242 |
-
# # Convert PostgresDsn to a string
|
243 |
-
# db_url_str = str(self.db_url)
|
244 |
-
# result = urlparse(db_url_str)
|
245 |
-
# return DatabaseConfig(
|
246 |
-
# username=result.username,
|
247 |
-
# password=result.password,
|
248 |
-
# hostname=result.hostname,
|
249 |
-
# port=result.port or 5432,
|
250 |
-
# database=result.path.lstrip("/"),
|
251 |
-
# )
|
252 |
-
|
253 |
-
# async def connect(self) -> None:
|
254 |
-
# """Create a connection pool."""
|
255 |
-
# try:
|
256 |
-
# for _ in range(self.pool_size):
|
257 |
-
# conn = await self._create_connection()
|
258 |
-
# self.pool.append(conn)
|
259 |
-
# logger.info(
|
260 |
-
# f"Database connection pool created with {self.pool_size} connections."
|
261 |
-
# )
|
262 |
-
# except DatabaseError as e:
|
263 |
-
# logger.error(f"Failed to create database connection pool: {e}")
|
264 |
-
# raise
|
265 |
-
|
266 |
-
# async def _create_connection(self) -> Connection:
|
267 |
-
# """Create a single database connection."""
|
268 |
-
# try:
|
269 |
-
# conn = pg8000.connect(
|
270 |
-
# user=self.config.username,
|
271 |
-
# password=self.config.password,
|
272 |
-
# host=self.config.hostname,
|
273 |
-
# port=self.config.port,
|
274 |
-
# database=self.config.database,
|
275 |
-
# )
|
276 |
-
# return conn
|
277 |
-
# except DatabaseError as e:
|
278 |
-
# logger.error(f"Failed to create database connection: {e}")
|
279 |
-
# raise DatabaseError("Failed to create database connection.")
|
280 |
-
|
281 |
-
# async def disconnect(self) -> None:
|
282 |
-
# """Close all connections in the pool."""
|
283 |
-
# with self.lock:
|
284 |
-
# for conn in self.pool:
|
285 |
-
# conn.close()
|
286 |
-
# self.pool.clear()
|
287 |
-
# logger.info("Database connection pool closed.")
|
288 |
-
|
289 |
-
# @asynccontextmanager
|
290 |
-
# async def get_connection(self) -> AsyncGenerator[Connection, None]:
|
291 |
-
# """Acquire a connection from the pool."""
|
292 |
-
# with self.lock:
|
293 |
-
# if not self.pool:
|
294 |
-
# raise DatabaseError("Database connection pool is empty.")
|
295 |
-
# conn = self.pool.pop()
|
296 |
-
# try:
|
297 |
-
# yield conn
|
298 |
-
# finally:
|
299 |
-
# with self.lock:
|
300 |
-
# self.pool.append(conn)
|
301 |
-
|
302 |
-
# async def fetch(self, query: str, *args) -> List[Dict]:
|
303 |
-
# """
|
304 |
-
# Execute a SELECT query and return the results as a list of dictionaries.
|
305 |
-
|
306 |
-
# Args:
|
307 |
-
# query (str): The SQL query to execute.
|
308 |
-
# *args: Query parameters.
|
309 |
-
|
310 |
-
# Returns:
|
311 |
-
# List[Dict]: A list of dictionaries where keys are column names and values are column values.
|
312 |
-
# """
|
313 |
-
# try:
|
314 |
-
# async with self.get_connection() as conn:
|
315 |
-
# cursor: Cursor = conn.cursor()
|
316 |
-
# cursor.execute(query, args)
|
317 |
-
# rows = cursor.fetchall()
|
318 |
-
# columns = [desc[0] for desc in cursor.description]
|
319 |
-
# return [dict(zip(columns, row)) for row in rows]
|
320 |
-
# except DatabaseError as e:
|
321 |
-
# logger.error(f"Error executing query: {query}. Error: {e}")
|
322 |
-
# raise DatabaseError(f"Failed to execute query: {query}")
|
323 |
-
|
324 |
-
# async def execute(self, query: str, *args) -> None:
|
325 |
-
# """
|
326 |
-
# Execute an INSERT, UPDATE, or DELETE query.
|
327 |
-
|
328 |
-
# Args:
|
329 |
-
# query (str): The SQL query to execute.
|
330 |
-
# *args: Query parameters.
|
331 |
-
# """
|
332 |
-
# try:
|
333 |
-
# async with self.get_connection() as conn:
|
334 |
-
# cursor: Cursor = conn.cursor()
|
335 |
-
# cursor.execute(query, args)
|
336 |
-
# conn.commit()
|
337 |
-
# except DatabaseError as e:
|
338 |
-
# logger.error(f"Error executing query: {query}. Error: {e}")
|
339 |
-
# raise DatabaseError(f"Failed to execute query: {query}")
|
340 |
-
|
341 |
-
|
342 |
-
# # Dependency to get the database instance
|
343 |
-
# async def get_db() -> AsyncGenerator[Database, None]:
|
344 |
-
# settings = DatabaseSettings()
|
345 |
-
# db = Database(db_url=settings.db_url, pool_size=settings.pool_size)
|
346 |
-
# await db.connect()
|
347 |
-
# try:
|
348 |
-
# yield db
|
349 |
-
# finally:
|
350 |
-
# await db.disconnect()
|
351 |
-
|
352 |
-
|
353 |
-
# # Example usage
|
354 |
-
# if __name__ == "__main__":
|
355 |
-
|
356 |
-
# async def main():
|
357 |
-
# settings = DatabaseSettings()
|
358 |
-
# db = Database(db_url=settings.db_url, pool_size=settings.pool_size)
|
359 |
-
# await db.connect()
|
360 |
-
|
361 |
-
# try:
|
362 |
-
# # Example query
|
363 |
-
# query = "SELECT * FROM your_table LIMIT 10"
|
364 |
-
# query = """
|
365 |
-
# SELECT
|
366 |
-
# ppt.type AS product_type,
|
367 |
-
# pc.name AS product_category
|
368 |
-
# FROM
|
369 |
-
# product_producttype ppt
|
370 |
-
# INNER JOIN
|
371 |
-
# product_category pc
|
372 |
-
# ON
|
373 |
-
# ppt.category_id = pc.id
|
374 |
-
# """
|
375 |
-
# result = await db.fetch(query)
|
376 |
-
# print(result)
|
377 |
-
# finally:
|
378 |
-
# await db.disconnect()
|
379 |
-
|
380 |
-
# asyncio.run(main())
|
381 |
-
|
382 |
import logging
|
383 |
from typing import AsyncGenerator, List, Optional, Dict
|
384 |
from pydantic_settings import BaseSettings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import logging
|
2 |
from typing import AsyncGenerator, List, Optional, Dict
|
3 |
from pydantic_settings import BaseSettings
|
src/api/models/embedding_models.py
CHANGED
@@ -13,6 +13,10 @@ class CreateEmbeddingRequest(BaseModel):
|
|
13 |
dataset_name: str = "re-mind/product_type_embedding"
|
14 |
|
15 |
|
|
|
|
|
|
|
|
|
16 |
class UpdateEmbeddingRequest(BaseModel):
|
17 |
dataset_name: str
|
18 |
updates: Dict[str, List] # Column name -> List of values
|
|
|
13 |
dataset_name: str = "re-mind/product_type_embedding"
|
14 |
|
15 |
|
16 |
+
class ReadEmbeddingRequest(BaseModel):
|
17 |
+
dataset_name: str
|
18 |
+
|
19 |
+
|
20 |
class UpdateEmbeddingRequest(BaseModel):
|
21 |
dataset_name: str
|
22 |
updates: Dict[str, List] # Column name -> List of values
|
src/api/services/huggingface_service.py
CHANGED
@@ -1,73 +1,3 @@
|
|
1 |
-
# from datasets import Dataset, load_dataset
|
2 |
-
# import logging
|
3 |
-
# from typing import Optional, Dict, List
|
4 |
-
# import pandas as pd
|
5 |
-
# from src.api.exceptions import DatasetNotFoundError, DatasetPushError
|
6 |
-
|
7 |
-
# # Set up structured logging
|
8 |
-
# logging.basicConfig(
|
9 |
-
# level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
10 |
-
# )
|
11 |
-
# logger = logging.getLogger(__name__)
|
12 |
-
|
13 |
-
|
14 |
-
# class HuggingFaceService:
|
15 |
-
# async def push_to_hub(self, df: pd.DataFrame, dataset_name: str) -> None:
|
16 |
-
# """Push the dataset to Hugging Face Hub."""
|
17 |
-
# try:
|
18 |
-
# logger.info(f"Creating Hugging Face Dataset: {dataset_name}...")
|
19 |
-
# ds = Dataset.from_pandas(df)
|
20 |
-
# ds.push_to_hub(dataset_name)
|
21 |
-
# logger.info(f"Dataset pushed to Hugging Face Hub: {dataset_name}")
|
22 |
-
# except Exception as e:
|
23 |
-
# logger.error(f"Failed to push dataset to Hugging Face Hub: {e}")
|
24 |
-
# raise DatasetPushError(f"Failed to push dataset: {e}")
|
25 |
-
|
26 |
-
# async def read_dataset(self, dataset_name: str) -> Optional[pd.DataFrame]:
|
27 |
-
# """Read a dataset from Hugging Face Hub."""
|
28 |
-
# try:
|
29 |
-
# logger.info(f"Loading dataset from Hugging Face Hub: {dataset_name}...")
|
30 |
-
# ds = load_dataset(dataset_name)
|
31 |
-
# df = ds["train"].to_pandas()
|
32 |
-
# return df
|
33 |
-
# except Exception as e:
|
34 |
-
# logger.error(f"Failed to read dataset: {e}")
|
35 |
-
# raise DatasetNotFoundError(f"Dataset not found: {e}")
|
36 |
-
|
37 |
-
# async def update_dataset(
|
38 |
-
# self, dataset_name: str, updates: Dict[str, List]
|
39 |
-
# ) -> Optional[pd.DataFrame]:
|
40 |
-
# """Update a dataset on Hugging Face Hub."""
|
41 |
-
# try:
|
42 |
-
# df = await self.read_dataset(dataset_name)
|
43 |
-
# for column, values in updates.items():
|
44 |
-
# if column in df.columns:
|
45 |
-
# df[column] = values
|
46 |
-
# else:
|
47 |
-
# logger.warning(f"Column '{column}' not found in dataset.")
|
48 |
-
# await self.push_to_hub(df, dataset_name)
|
49 |
-
# return df
|
50 |
-
# except Exception as e:
|
51 |
-
# logger.error(f"Failed to update dataset: {e}")
|
52 |
-
# raise DatasetPushError(f"Failed to update dataset: {e}")
|
53 |
-
|
54 |
-
# async def delete_columns(
|
55 |
-
# self, dataset_name: str, columns: List[str]
|
56 |
-
# ) -> Optional[pd.DataFrame]:
|
57 |
-
# """Delete columns from a dataset on Hugging Face Hub."""
|
58 |
-
# try:
|
59 |
-
# df = await self.read_dataset(dataset_name)
|
60 |
-
# for column in columns:
|
61 |
-
# if column in df.columns:
|
62 |
-
# df.drop(column, axis=1, inplace=True)
|
63 |
-
# else:
|
64 |
-
# logger.warning(f"Column '{column}' not found in dataset.")
|
65 |
-
# await self.push_to_hub(df, dataset_name)
|
66 |
-
# return df
|
67 |
-
# except Exception as e:
|
68 |
-
# logger.error(f"Failed to delete columns: {e}")
|
69 |
-
# raise DatasetPushError(f"Failed to delete columns: {e}")
|
70 |
-
|
71 |
from datasets import Dataset, load_dataset
|
72 |
from huggingface_hub import HfApi, HfFolder
|
73 |
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from datasets import Dataset, load_dataset
|
2 |
from huggingface_hub import HfApi, HfFolder
|
3 |
import logging
|
src/main.py
CHANGED
@@ -5,6 +5,7 @@ from pydantic import BaseModel
|
|
5 |
from typing import List, Dict
|
6 |
from src.api.models.embedding_models import (
|
7 |
CreateEmbeddingRequest,
|
|
|
8 |
UpdateEmbeddingRequest,
|
9 |
DeleteEmbeddingRequest,
|
10 |
)
|
@@ -114,16 +115,17 @@ async def create_embedding(
|
|
114 |
|
115 |
|
116 |
# Endpoint to read embeddings
|
117 |
-
@app.get("/read_embeddings/{dataset_name}")
|
|
|
118 |
async def read_embeddings(
|
119 |
-
|
120 |
huggingface_service: HuggingFaceService = Depends(get_huggingface_service),
|
121 |
):
|
122 |
"""
|
123 |
Read embeddings from a Hugging Face dataset.
|
124 |
"""
|
125 |
try:
|
126 |
-
df = await huggingface_service.read_dataset(dataset_name)
|
127 |
return df.to_dict(orient="records")
|
128 |
except DatasetNotFoundError as e:
|
129 |
logger.error(f"Dataset not found: {e}")
|
@@ -168,9 +170,7 @@ async def delete_embeddings(
|
|
168 |
Delete embeddings from a Hugging Face dataset.
|
169 |
"""
|
170 |
try:
|
171 |
-
await huggingface_service.delete_dataset(
|
172 |
-
request.dataset_name
|
173 |
-
)
|
174 |
return {
|
175 |
"message": "Embeddings deleted successfully.",
|
176 |
"dataset_name": request.dataset_name,
|
|
|
5 |
from typing import List, Dict
|
6 |
from src.api.models.embedding_models import (
|
7 |
CreateEmbeddingRequest,
|
8 |
+
ReadEmbeddingRequest,
|
9 |
UpdateEmbeddingRequest,
|
10 |
DeleteEmbeddingRequest,
|
11 |
)
|
|
|
115 |
|
116 |
|
117 |
# Endpoint to read embeddings
|
118 |
+
# @app.get("/read_embeddings/{dataset_name}")
|
119 |
+
@app.get("/read_embeddings")
|
120 |
async def read_embeddings(
|
121 |
+
request: ReadEmbeddingRequest,
|
122 |
huggingface_service: HuggingFaceService = Depends(get_huggingface_service),
|
123 |
):
|
124 |
"""
|
125 |
Read embeddings from a Hugging Face dataset.
|
126 |
"""
|
127 |
try:
|
128 |
+
df = await huggingface_service.read_dataset(request.dataset_name)
|
129 |
return df.to_dict(orient="records")
|
130 |
except DatasetNotFoundError as e:
|
131 |
logger.error(f"Dataset not found: {e}")
|
|
|
170 |
Delete embeddings from a Hugging Face dataset.
|
171 |
"""
|
172 |
try:
|
173 |
+
await huggingface_service.delete_dataset(request.dataset_name)
|
|
|
|
|
174 |
return {
|
175 |
"message": "Embeddings deleted successfully.",
|
176 |
"dataset_name": request.dataset_name,
|