File size: 808 Bytes
9219965
 
 
 
 
 
 
141feee
 
 
9219965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141feee
36929a8
9219965
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env bash

INPUT="$1"
OUTPUT="$2"
FIELDS="$3"
NLINES="${4:-1000}"

python -m venv .schemist \
&& .schemist/bin/pip install "pandas" "schemist>=0.0.4" \
&& source .schemist/bin/activate

# Some functions for convenience 
logger () (
    local message="$1"
    local _date=$(date)
    local prefix=${2:-"$_date"}
    >&2 echo "$prefix :: $message"
)

pandas () (
    local cmd="$1"
    local sep1=${2:-,}
    local idx=${3:-False}
    local sep2=${4:-"$sep1"}
    python -c 'import sys; import pandas as pd; df = pd.read_csv(sys.stdin, sep="'"$sep1"'", low_memory=False)'"$cmd"'.to_csv(sys.stdout, index='"$idx"', sep="'"$sep2"'")'
)

set -e
set -x

pandas '[['"$FIELDS"']].sample('"$NLINES"')' \
< "$INPUT" \
| schemist convert -c SMILES -2 id -f CSV \
| pandas '.sort_values(["id"])' \
> "$OUTPUT"