fatmacankara commited on
Commit
6877db6
·
1 Parent(s): d3aafa8

Update ASCARIS.py

Browse files
Files changed (1) hide show
  1. ASCARIS.py +31 -4
ASCARIS.py CHANGED
@@ -11,10 +11,37 @@ import argparse
11
  from st_aggrid import AgGrid, GridOptionsBuilder, JsCode,GridUpdateMode
12
  import base64
13
  showWarningOnDirectExecution = False
14
- from datasets import load_dataset
15
- alphafold_path = load_dataset("HUBioDataLab/AlphafoldStructures")
16
- st.write('path')
17
- st.write(alphafold_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def convert_df(df):
19
  return df.to_csv(index=False).encode('utf-8')
20
 
 
11
  from st_aggrid import AgGrid, GridOptionsBuilder, JsCode,GridUpdateMode
12
  import base64
13
  showWarningOnDirectExecution = False
14
+
15
+
16
+ from datasets import Dataset, concatenate_datasets
17
+ MAX_SAMPLES_IN_MEMORY = 1000
18
+ samples_in_dset = 0
19
+ dset = Dataset.from_dict({"col1": [], "col2": []}) # empty dataset
20
+ path_to_save_dir = "HUBioData/input_files"
21
+ num_chunks = 0
22
+ for example_dict in custom_example_dict_streamer("HUBioData/AlphafoldStructures"):
23
+ dset = dset.add_item(example_dict)
24
+ samples_in_dset += 1
25
+ if samples_in_dset == MAX_SAMPLES_IN_MEMORY:
26
+ samples_in_dset = 0
27
+ dset.save_to_disk(f"{path_to_save_dir}{num_chunks}")
28
+ num_chunks =+ 1
29
+ dset = Dataset.from_dict({"col1": [], "col2": []}) # empty dataset
30
+ if samples_in_dset > 0:
31
+ dset.save_to_disk(f"{path_to_save_dir}{num_chunks}")
32
+ num_chunks =+ 1
33
+ loaded_dsets = [] # memory-mapped
34
+ for chunk_num in range(num_chunks):
35
+ dset = Dataset.load_from_disk(f"{path_to_save_dir}{chunk_num}")
36
+ loaded_dsets.append(dset)
37
+ final_dset = concatenate_datasets(dset)
38
+ st.write('FİNAL DSET')
39
+ st.write(final_dset)
40
+
41
+
42
+
43
+
44
+
45
  def convert_df(df):
46
  return df.to_csv(index=False).encode('utf-8')
47