Spaces:

rai-institute-testbed
/

Datasets

Sleeping

App Files Files Community

rhea2809 commited on Oct 11, 2023

Commit

5db8214

1 Parent(s): adbc9a0

Update data_list.py

Browse files

Files changed (1) hide show

data_list.py +77 -0

data_list.py CHANGED Viewed

	@@ -0,0 +1,77 @@

+from __future__ import annotations
+import numpy as np
+import pandas as pd
+import requests
+from huggingface_hub.hf_api import SpaceInfo
+SHEET_ID = '1BWKw2ygYQUUPcNdSJhW9OkXILWO5i-dCa5Uahn9dHNo'
+SHEET_NAME = 'Datasets'
+csv_url = f'https://docs.google.com/spreadsheets/d/{SHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'
+class DataList:
+    def __init__(self):
+        self.table = pd.read_csv(csv_url)
+        self.table = self.table.astype({'Year':'string'})
+        self._preprocess_table()
+        self.table_header = '''
+            <tr>
+                <td width="15%">Name</td>
+                <td width="10%">URL</td>
+                <td width="30%">About</td>
+                <td width="15%">Publisher</td>
+                <td width="10%">Year Updated</td>
+                <td width="10%">Type</td>
+                <td width="10%">Tag</td>
+            </tr>'''
+    def _preprocess_table(self) -> None:
+        self.table['name_lowercase'] = self.table['Name'].str.lower()
+        rows = []
+        for row in self.table.itertuples():
+            source = f'<a href="{row.URL}" target="_blank">Link</a>' if isinstance(
+                row.URL, str) else ''
+            row = f'''
+                <tr>
+                    <td>{row.Name}</td>
+                    <td>{source}</td>
+                    <td>{row.About}</td>
+                    <td>{row.Publisher}</td>
+                    <td>{row.Year}</td>
+                    <td>{row.Type}</td>
+                    <td>{row.Tags}</td>
+                </tr>'''
+            rows.append(row)
+        self.table['html_table_content'] = rows
+    def render(self, search_query: str,
+            case_sensitive: bool,
+            filter_names: list[str],
+            data_types: list[str]) -> tuple[int, str]:
+        df = self.table
+        if search_query:
+            if case_sensitive:
+                df = df[df.name.str.contains(search_query)]
+            else:
+                df = df[df.name_lowercase.str.contains(search_query.lower())]
+        df = self.filter_table(df, filter_names, data_types)
+        result = self.to_html(df, self.table_header)
+        return result
+    @staticmethod
+    def filter_table(df: pd.DataFrame, filter_names: list[str], data_types: list[str]) -> pd.DataFrame:
+        df = df.loc[df.Type.isin(set(filter_names))]
+        df = df.loc[df.Tags.isin(set(data_types))]
+        return df
+    @staticmethod
+    def to_html(df: pd.DataFrame, table_header: str) -> str:
+        table_data = ''.join(df.html_table_content)
+        html = f'''
+        <table>
+            {table_header}
+            {table_data}
+        </table>'''
+        return html