From e78900d0aef21345d04c2cfb1904127ace9dc453 Mon Sep 17 00:00:00 2001 From: Aleteoryx Date: Sun, 23 Feb 2025 22:50:32 -0500 Subject: [PATCH] tar import/export --- .gitignore | 1 + backupper.tcl | 207 +++++++++++++++++++++++++++++++++++++++++++++++--- naivetar.tcl | 67 ++++++++++++++++ 3 files changed, 263 insertions(+), 12 deletions(-) create mode 100644 naivetar.tcl diff --git a/.gitignore b/.gitignore index 0a11896adf299181be31eb4e82830cd821b7b5c1..cf300cd9ec407b7775f4dbf55dec82c0494e16a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *~ store/ *.png +*.tar diff --git a/backupper.tcl b/backupper.tcl index ae786632288f55f531add2c59e1a9e5225e20224..7c698814729dc5289a0c4b7ccfca40a8d98ba677 100755 --- a/backupper.tcl +++ b/backupper.tcl @@ -2,6 +2,7 @@ package require sqlite3 package require Thread +package require tcl::chan::string set init { package require sha256 @@ -142,14 +143,73 @@ set init { } eval $init +namespace eval ::tar {} +proc ::tar::nextrec {fd} { + set hdr [read $fd 512] + binary scan $hdr c _ + if {[string length $hdr] != 512} { die "got partial header: [string length $hdr] b" } + + binary scan $hdr a100c24a11c1c12c8a1c* name blind1 size _ blind2 _ type blind3 + set name [lindex [split $name "\0"] 0] + + if {$name == {}} return + + set data [read $fd [expr {$size}]] + if {$size % 512 != 0} { + read $fd [expr {512 - ($size % 512)}] + } + + return [list name $name type $type data [binary encode hex $data] blind1 $blind1 blind2 $blind2 blind3 $blind3] +} +proc ::tar::fmtrec {rec} { + set name [dict get $rec name] + set type [dict get $rec type] + set size [expr {[string length [dict get $rec data]] / 2}] + set size [format %011o $size] + + set blind1 [dict get $rec blind1] + set blind2 [dict get $rec blind2] + set blind3 [dict get $rec blind3] + + set hdr [binary format a100c24a12c12A8a1c* $name $blind1 $size $blind2 "" $type $blind3] + binary scan $hdr c* bytes + set checksum 0 + foreach byte $bytes { + incr checksum [expr {$byte & 0xFF}] + } + set checksum [format %07o $checksum] + set hdr [binary format a100c24a12c12a8a1c* $name $blind1 $size $blind2 $checksum $type $blind3] + + append hdr [binary decode hex [dict get $rec data]] + if {$size % 512 != 0} { + append hdr [binary format a[expr {512 - ($size % 512)}] ""] + } + + return $hdr +} + + proc usage {} { global argv0 - puts stderr "usage: $argv0 command store ?args ...?" - puts stderr "commands:" - puts stderr " init - initialize a store in store" - puts stderr " importlist ?listfile? - if listfile is ommitted, reads stdin" - puts stderr " print hash - prints the contents of on stdout" - puts stderr " stat hash - prints metadata about " + puts stderr {usage: $argv0 command store ?args ...? +commands: + init + - initialize a store in store + importlist name ?listfile? + - imports the contents of listfile under name, treating each line as a filepath + - if listfile is ommitted, reads stdin + importtar name ?tarfile? + - imports the contents of tarfile under name. will not auto-decompress. + - if tarfile is ommitted, reads stdin + import ?filepath? + - stores the contents of filepath, returning the hash + - if filepath is ommitted, reads stdin + export id + - exports the contents of backup id to a file named like NAME_DATE + print hash + - prints the contents of the file named by hash on stdout + stat hash + - prints metadata about the file named by hash} exit -1 } proc die {reason} { @@ -186,13 +246,13 @@ set pool [tpool::create -maxworkers $nproc -initcmd [concat $init "; " [list set switch -- $command { importlist { - if {[llength $argv] > 1} usage - if {[llength $argv] == 1} { - set fd [open [lindex $argv 0]] - set fname [lindex $argv 0] + if {[llength $argv] ni {1 2}} usage + set fname [lindex $argv 0] + + if {[llength $argv] == 2} { + set fd [open [lindex $argv 1]] } else { set fd stdin - set fname "pipeline" } set files [split [string trim [read $fd]] "\n"] @@ -212,7 +272,7 @@ switch -- $command { close $fd set hash [writestore $arpath $data 1] - return [list [string trimleft $filename "/"] $hash] + return [list [string trimleft [file normalize $filename] "/"] $hash] }] lappend joblist [tpool::post $pool $script] @@ -234,6 +294,115 @@ switch -- $command { puts stderr "inserted as archive $rowid with hash $ref" } + importtar { + if {[llength $argv] ni {1 2}} usage + set fname [lindex $argv 0] + + if {[llength $argv] == 2} { + set fd [open [lindex $argv 1]] rb + } else { + set fd stdin + fconfigure stdin -translation binary + } + + set joblist {} + set result {} + + while {[set record [::tar::nextrec $fd]] != {}} { + set script [concat [list set record $record] "; " { + set hash [writestore $arpath [binary decode hex [dict get $record data]] 1] + dict set record data $hash + return $record + }] + + lappend joblist [tpool::post $pool $script] + } + + if {$fd != "stdin"} { close $fd } + + foreach job $joblist { + tpool::wait $pool $job + append result [::tar::fmtrec [tpool::get $pool $job]] + } + + append result [binary format a[expr {16384 - ([string length $result] % 16384) + 512}] ""] + + set ref [writestore $arpath $result 1] + set date [clock seconds] + set rowid [db eval { + INSERT INTO tarballs VALUES ($fname, "tar", $ref, "", $date) RETURNING rowid; + }] + + puts stderr "inserted as archive $rowid with hash $ref" + } + + export { + if {[llength $argv] != 1} usage + + set rowid [lindex $argv 0] + set record [db eval { + SELECT ref,type,name,date FROM tarballs WHERE rowid = $rowid; + }] + if {[llength $record] == 0} { die "unknown archive id" } + lassign $record ref type name date + + set exportname [regsub "/" "${name}_[clock format $date -format {%Y-%m-%d_%H.%M.%S}]" "-"] + set archive [readstore $arpath $ref 1] + + switch -- $type { + list { + file mkdir $exportname + + set joblist {} + foreach {path hash} $archive { + set script [concat [list set path "$exportname/$path"] "; " [list set hash $hash] "; " { + puts "$hash -> $path" + file mkdir [file dirname $path] + set fd [open "$path" wb] + puts -nonewline $fd [readstore $arpath $hash 1] + close $fd + }] + + lappend joblist [tpool::post $pool $script] + } + + foreach job $joblist { + tpool::wait $pool $job + } + } + tar { + set exportname "$exportname.tar" + set exportfd [open $exportname wb] + + set importfd [::tcl::chan::string $archive] + fconfigure $importfd -translation binary + + set joblist {} + while {[set record [::tar::nextrec $importfd]] != {}} { + set script [concat [list set record $record] "; " { + dict set record data [binary encode hex [readstore $arpath [dict get $record data] 1]] + return $record + }] + + lappend joblist [tpool::post -nowait $pool $script] + } + + foreach job $joblist { + tpool::wait $pool $job + puts -nonewline $exportfd [::tar::fmtrec [tpool::get $pool $job]] + } + puts -nonewline $exportfd [binary format a[expr {16384 - ([tell $exportfd] % 16384) + 512}] ""] + + close $exportfd + } + default { + die "database corrupt: unknown archive type \"$type\"" + } + } + + puts "exported to $exportname" + } + print { if {[llength $argv] != 1} usage @@ -257,5 +426,19 @@ switch -- $command { puts " data size (bytes): [dict get $stat datasize]" } + import { + if {[llength $argv] > 1} usage + if {[llength $argv] == 1} { + set fd [open [lindex $argv 0]] + } else { + set fd stdin + } + + set data [read $fd] + + set hash [writestore $arpath $data 1] + puts "imported as $hash" + } + default usage } diff --git a/naivetar.tcl b/naivetar.tcl new file mode 100644 index 0000000000000000000000000000000000000000..4114748e142c2484841bd157347462b91ddd99c8 --- /dev/null +++ b/naivetar.tcl @@ -0,0 +1,67 @@ + +proc die {reason} { + global argv0 + puts stderr "$argv0: fatal: $reason" + exit -2 +} + +proc nextrec {fd} { + set hdr [read $fd 512] + binary scan $hdr c _ + if {[string length $hdr] != 512} { die "got partial header: [string length $hdr]b" } + + binary scan $hdr a100c24a11c1c12c8a1c* name blind1 size _ blind2 _ type blind3 + set name [lindex [split $name "\0"] 0] + + if {$name == {}} return + + set data [read $fd [expr {$size}]] + if {$size % 512 != 0} { + read $fd [expr {512 - ($size % 512)}] + } + + return [list name $name type $type data [binary encode hex $data] blind1 $blind1 blind2 $blind2 blind3 $blind3] +} + +proc putrec {fd rec} { + set name [dict get $rec name] + set type [dict get $rec type] + set size [expr {[string length [dict get $rec data]] / 2}] + set size [format %011o $size] + +# puts "$name - $size" + + set blind1 [dict get $rec blind1] + set blind2 [dict get $rec blind2] + set blind3 [dict get $rec blind3] + + set hdr [binary format a100c24a12c12A8a1c* $name $blind1 $size $blind2 "" $type $blind3] + binary scan $hdr c* bytes + set checksum 0 + foreach byte $bytes { + incr checksum [expr {$byte & 0xFF}] + } + set checksum [format %07o $checksum] + set hdr [binary format a100c24a12c12a8a1c* $name $blind1 $size $blind2 $checksum $type $blind3] + + puts -nonewline $fd $hdr + + puts -nonewline $fd [binary decode hex [dict get $rec data]] + if {$size % 512 != 0} { + puts -nonewline $fd [binary format a[expr {512 - ($size % 512)}] ""] + } +} + +set fd [open [lindex $argv 0] rb] +set fd2 [open [lindex $argv 1] wb] + +while {[set record [nextrec $fd]] != {}} { + if {[dict get $record type] == "0"} { + puts "type of [dict get $record name] is 0, overwriting" + dict set record data [binary encode hex "Hello, World!"] +# puts $record + } + putrec $fd2 $record +} + +puts -nonewline $fd2 [binary format a[expr {16384 - ([tell $fd2] % 16384)}] ""]