fatmacankara commited on
Commit
dc66b2b
·
1 Parent(s): 6877db6

Update ASCARIS.py

Browse files
Files changed (1) hide show
  1. ASCARIS.py +6 -24
ASCARIS.py CHANGED
@@ -12,35 +12,17 @@ from st_aggrid import AgGrid, GridOptionsBuilder, JsCode,GridUpdateMode
12
  import base64
13
  showWarningOnDirectExecution = False
14
 
 
15
 
16
- from datasets import Dataset, concatenate_datasets
17
- MAX_SAMPLES_IN_MEMORY = 1000
18
- samples_in_dset = 0
19
- dset = Dataset.from_dict({"col1": [], "col2": []}) # empty dataset
20
- path_to_save_dir = "HUBioData/input_files"
21
- num_chunks = 0
22
- for example_dict in custom_example_dict_streamer("HUBioData/AlphafoldStructures"):
23
- dset = dset.add_item(example_dict)
24
- samples_in_dset += 1
25
- if samples_in_dset == MAX_SAMPLES_IN_MEMORY:
26
- samples_in_dset = 0
27
- dset.save_to_disk(f"{path_to_save_dir}{num_chunks}")
28
- num_chunks =+ 1
29
- dset = Dataset.from_dict({"col1": [], "col2": []}) # empty dataset
30
- if samples_in_dset > 0:
31
- dset.save_to_disk(f"{path_to_save_dir}{num_chunks}")
32
- num_chunks =+ 1
33
- loaded_dsets = [] # memory-mapped
34
- for chunk_num in range(num_chunks):
35
- dset = Dataset.load_from_disk(f"{path_to_save_dir}{chunk_num}")
36
- loaded_dsets.append(dset)
37
- final_dset = concatenate_datasets(dset)
38
- st.write('FİNAL DSET')
39
- st.write(final_dset)
40
 
 
41
 
42
 
 
43
 
 
 
44
 
45
  def convert_df(df):
46
  return df.to_csv(index=False).encode('utf-8')
 
12
  import base64
13
  showWarningOnDirectExecution = False
14
 
15
+ from datasets import load_dataset
16
 
17
+ # Replace 'dataset_name' with the name of the dataset you want to use
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ dataset = load_dataset('HUBioDataLab/AlphafoldStructures')
20
 
21
 
22
+ file_path = 'AF-A0A075B6Y9-F1-model_v4.cif.gz'
23
 
24
+ # Access the file content
25
+ st.write(file_path)
26
 
27
  def convert_df(df):
28
  return df.to_csv(index=False).encode('utf-8')