cat abuse with split
# cat gets used all the time just to pipe the contents of a
# file to stdout.
# But it is actually for concatenating files.
# It has a partner in crime called split.
# Working together they are very powerful for parallel processing.
# split - do work in parallel - cat
# split then cat will produce out output file which is identical to the input:
# Make a 1 gig file of random bytes on my external ssd.
time head -c $(( 1024 * 1024 * 1024 )) /dev/urandom > $(mktemp '/DataSwap/big.XXXXXXXX')
real 0m5.738s
user 0m0.057s
sys 0m5.680s
# Yey - that was fast!
[aturner@Alexanders-MBP ~]$ split -b $(( 1024 * 1024)) /DataSwap/big.* '/DataSwap/parts'
[aturner@Alexanders-MBP ~]$ ls /DataSwap/parts*
/DataSwap/partsaa /DataSwap/partsgp /DataSwap/partsne /DataSwap/partstt /DataSwap/partszabi /DataSwap/partszahx
/DataSwap/partsab /DataSwap/partsgq /DataSwap/partsnf /DataSwap/partstu /DataSwap/partszabj /DataSwap/partszahy
/DataSwap/partsac /DataSwap/partsgr /DataSwap/partsng /DataSwap/partstv /DataSwap/partszabk /DataSwap/partszahz
# ... stuff not shown as there are 1024 files!
# cat them back together - note how split creates file names
# such that ls automatically creates the correct ordering.
time cat $(ls /DataSwap/parts*) > /DataSwap/Rebuild
real 0m1.949s
user 0m0.017s
sys 0m0.802s
# Again - time is not too shabby (over 500 mega bytes per second)!
cmp /DataSwap/big.oTFSbZnD /DataSwap/Rebuild
[[ $? == 0 ]] && print Yey
Yey
# file to stdout.
# But it is actually for concatenating files.
# It has a partner in crime called split.
# Working together they are very powerful for parallel processing.
# split - do work in parallel - cat
# split then cat will produce out output file which is identical to the input:
# Make a 1 gig file of random bytes on my external ssd.
time head -c $(( 1024 * 1024 * 1024 )) /dev/urandom > $(mktemp '/DataSwap/big.XXXXXXXX')
real 0m5.738s
user 0m0.057s
sys 0m5.680s
# Yey - that was fast!
[aturner@Alexanders-MBP ~]$ split -b $(( 1024 * 1024)) /DataSwap/big.* '/DataSwap/parts'
[aturner@Alexanders-MBP ~]$ ls /DataSwap/parts*
/DataSwap/partsaa /DataSwap/partsgp /DataSwap/partsne /DataSwap/partstt /DataSwap/partszabi /DataSwap/partszahx
/DataSwap/partsab /DataSwap/partsgq /DataSwap/partsnf /DataSwap/partstu /DataSwap/partszabj /DataSwap/partszahy
/DataSwap/partsac /DataSwap/partsgr /DataSwap/partsng /DataSwap/partstv /DataSwap/partszabk /DataSwap/partszahz
# ... stuff not shown as there are 1024 files!
# cat them back together - note how split creates file names
# such that ls automatically creates the correct ordering.
time cat $(ls /DataSwap/parts*) > /DataSwap/Rebuild
real 0m1.949s
user 0m0.017s
sys 0m0.802s
# Again - time is not too shabby (over 500 mega bytes per second)!
cmp /DataSwap/big.oTFSbZnD /DataSwap/Rebuild
[[ $? == 0 ]] && print Yey
Yey
Comments
Post a Comment