Spaces:
Running
Running
File size: 808 Bytes
9219965 141feee 9219965 141feee 36929a8 9219965 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
#!/usr/bin/env bash
INPUT="$1"
OUTPUT="$2"
FIELDS="$3"
NLINES="${4:-1000}"
python -m venv .schemist \
&& .schemist/bin/pip install "pandas" "schemist>=0.0.4" \
&& source .schemist/bin/activate
# Some functions for convenience
logger () (
local message="$1"
local _date=$(date)
local prefix=${2:-"$_date"}
>&2 echo "$prefix :: $message"
)
pandas () (
local cmd="$1"
local sep1=${2:-,}
local idx=${3:-False}
local sep2=${4:-"$sep1"}
python -c 'import sys; import pandas as pd; df = pd.read_csv(sys.stdin, sep="'"$sep1"'", low_memory=False)'"$cmd"'.to_csv(sys.stdout, index='"$idx"', sep="'"$sep2"'")'
)
set -e
set -x
pandas '[['"$FIELDS"']].sample('"$NLINES"')' \
< "$INPUT" \
| schemist convert -c SMILES -2 id -f CSV \
| pandas '.sort_values(["id"])' \
> "$OUTPUT"
|