From ad46462a4c98445333fea1bfec689a79044be7cc Mon Sep 17 00:00:00 2001 From: Aleteoryx Date: Fri, 21 Feb 2025 22:31:22 -0500 Subject: [PATCH] parallelism, utility commands --- backupper.tcl | 285 +++++++++++++++++++++++++++++--------------------- 1 file changed, 168 insertions(+), 117 deletions(-) diff --git a/backupper.tcl b/backupper.tcl index 82dd8191cfc06719b903ccd1659ee7e6f37b0077..ae786632288f55f531add2c59e1a9e5225e20224 100755 --- a/backupper.tcl +++ b/backupper.tcl @@ -1,139 +1,146 @@ #!/bin/env tclsh -package require sha256 package require sqlite3 +package require Thread -proc readbak {path} { - set fd [open $path rb] - set magic [read $fd 6] - if {$magic != "AMEBAK"} { - close $fd - return -code error "invalid magic string: \"$magic\"" - } +set init { + package require sha256 - set ver [read $fd 1] - binary scan $ver c ver - if {$ver != 0} { - close $fd - return -code error "archive file too new: $ver (self: 0)" - } - - set mode [read $fd 1] - read $fd 8; # skip filesize - switch -- $mode { - r {} - F { - zlib push inflate $fd -level 9 - } - default { + proc readbak {path} { + set fd [open $path rb] + set magic [read $fd 6] + if {$magic != "AMEBAK"} { close $fd - return -code error "unknown compression mode: $mode" + return -code error "invalid magic string: \"$magic\"" } - } - set ret [read $fd] - close $fd - return $ret -} -proc readstore {store hash {hex 0}} { - if {!$hex} { set hexhash [binary encode hex $hash] } - set path $store - append path "/[string range $hexhash 0 1]" - append path "/[string range $hexhash 2 3]" - append path "/[string range $hexhash 4 5]/" - append path "/[string range $hexhash 6 end]" - if {[file exists $path]} { - return [readbak $path] - } else { - return -code error "missing file" - } -} + set ver [read $fd 1] + binary scan $ver c ver + if {$ver != 0} { + close $fd + return -code error "archive file too new: $ver (self: 0)" + } -proc writebak {path data} { - binary scan $data c _ - set bench [string range $data 0 10000000] - binary scan $bench c _ - set Fbench [zlib deflate $bench 9] + set mode [read $fd 1] + read $fd 8; # skip filesize + switch -- $mode { + r {} + F { + zlib push inflate $fd -level 9 + } + default { + close $fd + return -code error "unknown compression mode: $mode" + } + } - set fd [open $path wb] - puts -nonewline $fd "AMEBAK\0" + set ret [read $fd] + close $fd + return $ret + } + proc readstore {store hash {hex 0}} { + set hexhash $hash + if {!$hex} { set hexhash [binary encode hex $hash] } + set path $store + append path "/[string range $hexhash 0 1]" + append path "/[string range $hexhash 2 3]" + append path "/[string range $hexhash 4 5]/" + append path "/[string range $hexhash 6 end]" + if {[file exists $path]} { + return [readbak $path] + } else { + return -code error "missing file" + } + } - set compress 0 - if {[string length $Fbench] < ([string length $bench] * 95 / 100)} { - puts -nonewline $fd "F" - set compress 1 - } else { puts -nonewline $fd "r" } + proc writebak {path data} { + binary scan $data c _ + set bench [string range $data 0 10000000] + binary scan $bench c _ + set Fbench [zlib deflate $bench 9] - puts -nonewline $fd [binary format W [string length $data]] + set fd [open $path wb] + puts -nonewline $fd "AMEBAK\0" - if {$compress} { zlib push deflate $fd -level 9 } + set compress 0 + if {[string length $Fbench] < ([string length $bench] * 95 / 100)} { + puts -nonewline $fd "F" + set compress 1 + } else { puts -nonewline $fd "r" } - puts -nonewline $fd $data - close $fd -} -proc writestore {store data {hex 0}} { - set hash [::sha2::sha256 -bin -- $data] - set hexhash [binary encode hex $hash] - set path $store - append path "/[string range $hexhash 0 1]" - append path "/[string range $hexhash 2 3]" - append path "/[string range $hexhash 4 5]/" - file mkdir $path - set path "$path/[string range $hexhash 6 end]" - - if {![file exists $path]} { - writebak $path $data - } + puts -nonewline $fd [binary format W [string length $data]] - if {$hex} { - return $hexhash - } else { - return $hash - } -} + if {$compress} { zlib push deflate $fd -level 9 } -proc statbak {path} { - set fd [open $path rb] - set magic [read $fd 6] - if {$magic != "AMEBAK"} { + puts -nonewline $fd $data close $fd - return -code error "invalid magic string: \"$magic\"" } + proc writestore {store data {hex 0}} { + set hash [::sha2::sha256 -bin -- $data] + set hexhash [binary encode hex $hash] + set path $store + append path "/[string range $hexhash 0 1]" + append path "/[string range $hexhash 2 3]" + append path "/[string range $hexhash 4 5]/" + file mkdir $path + set path "$path/[string range $hexhash 6 end]" + + if {![file exists $path]} { + writebak $path $data + } - set ver [read $fd 1] - binary scan $ver c ver - if {$ver != 0} { - close $fd - return -code error "archive file too new: $ver (self: 0)" + if {$hex} { + return $hexhash + } else { + return $hash + } } - set mode [read $fd 1] - switch -- $mode { - r { set mode raw } - F { set mode deflate } - default { + proc statbak {path} { + set fd [open $path rb] + set magic [read $fd 6] + if {$magic != "AMEBAK"} { close $fd - return -code error "unknown compression mode: $mode" + return -code error "invalid magic string: \"$magic\"" } - } - set disksize [file size $path] - binary scan [read $fd 8] W datasize + set ver [read $fd 1] + binary scan $ver c ver + if {$ver != 0} { + close $fd + return -code error "archive file too new: $ver (self: 0)" + } - close $fd - return [list ver $ver mode $mode disksize $disksize datasize $datasize] -} -proc statstore {store hash {hex 0}} { - if {$$hex} { set hexhash [binary encode hex $hash] } - set path $store - append path "/[string range $hexhash 0 1]" - append path "/[string range $hexhash 2 3]" - append path "/[string range $hexhash 4 5]/" - append path "/[string range $hexhash 6 end]" - if {![file exists $path]} return - - statbak $path + set mode [read $fd 1] + switch -- $mode { + r { set mode raw } + F { set mode deflate } + default { + close $fd + return -code error "unknown compression mode: $mode" + } + } + + set disksize [file size $path] + binary scan [read $fd 8] W datasize + + close $fd + return [list ver $ver mode $mode disksize $disksize datasize $datasize] + } + proc statstore {store hash {hex 0}} { + set hexhash $hash + if {!$hex} { set hexhash [binary encode hex $hash] } + set path $store + append path "/[string range $hexhash 0 1]" + append path "/[string range $hexhash 2 3]" + append path "/[string range $hexhash 4 5]/" + append path "/[string range $hexhash 6 end]" + if {![file exists $path]} return + + dict replace [statbak $path] path $path + } } +eval $init proc usage {} { global argv0 @@ -141,6 +148,8 @@ proc usage {} { puts stderr "commands:" puts stderr " init - initialize a store in store" puts stderr " importlist ?listfile? - if listfile is ommitted, reads stdin" + puts stderr " print hash - prints the contents of on stdout" + puts stderr " stat hash - prints metadata about " exit -1 } proc die {reason} { @@ -170,6 +179,11 @@ if {$command == "init"} { if {[catch {sqlite3 db $dbpath -create false}]} { die "can't find a store at \"$store\"." } +set fd [open /proc/cpuinfo r] +set nproc [llength [regexp -all -lineanchor -inline "^processor" [read $fd]]] +close $fd +set pool [tpool::create -maxworkers $nproc -initcmd [concat $init "; " [list set arpath $arpath]]] + switch -- $command { importlist { if {[llength $argv] > 1} usage @@ -182,8 +196,9 @@ switch -- $command { } set files [split [string trim [read $fd]] "\n"] + set joblist {} set filelist {} - close $fd + if {$fd != "stdin"} { close $fd } foreach filename $files { if {![file exists $filename]} { @@ -191,20 +206,56 @@ switch -- $command { continue } - set fd [open $filename rb] - set data [read $fd] - close $fd + set script [concat [list set filename $filename] "; " { + set fd [open $filename rb] + set data [read $fd] + close $fd - set hash [writestore $arpath $data 1] - lappend filelist [string trimleft $filename "/"] $hash + set hash [writestore $arpath $data 1] + return [list [string trimleft $filename "/"] $hash] + }] + + lappend joblist [tpool::post $pool $script] + } + + foreach job $joblist { + tpool::wait $pool $job + append filelist " [tpool::get $pool $job]" } + set filelist [lsort -stride 2 $filelist] + set ref [writestore $arpath $filelist 1] set date [clock seconds] set rowid [db eval { INSERT INTO tarballs VALUES ($fname, "list", $ref, "", $date) RETURNING rowid; }] - puts stderr "inserted as archive $rowid" + puts stderr "inserted as archive $rowid with hash $ref" } + + print { + if {[llength $argv] != 1} usage + + set stat [statstore $arpath [lindex $argv 0] 1] + if {$stat == {}} { die "unknown hash" } + + puts [readstore $arpath [lindex $argv 0] 1] + } + stat { + if {[llength $argv] != 1} usage + + set stat [statstore $arpath [lindex $argv 0] 1] + if {$stat == {}} { die "unknown hash" } + + puts "file [lindex $argv 0]:" + puts " version: [dict get $stat ver]" + puts " compression: [dict get $stat mode]" + if {[dict get $stat mode] != "raw"} { + puts " compressed size (bytes): [dict get $stat disksize]" + } + puts " data size (bytes): [dict get $stat datasize]" + } + + default usage }