~aleteoryx/backupper

e78900d0aef21345d04c2cfb1904127ace9dc453 — Aleteoryx 9 months ago ad46462
tar import/export
3 files changed, 263 insertions(+), 12 deletions(-)

M .gitignore
M backupper.tcl
A naivetar.tcl
M .gitignore => .gitignore +1 -0
@@ 1,3 1,4 @@
*~
store/
*.png
*.tar

M backupper.tcl => backupper.tcl +195 -12
@@ 2,6 2,7 @@

package require sqlite3
package require Thread
package require tcl::chan::string

set init {
  package require sha256


@@ 142,14 143,73 @@ set init {
}
eval $init

namespace eval ::tar {}
proc ::tar::nextrec {fd} {
  set hdr [read $fd 512]
  binary scan $hdr c _
  if {[string length $hdr] != 512} { die "got partial header: [string length $hdr] b" }

  binary scan $hdr a100c24a11c1c12c8a1c* name blind1 size _ blind2 _ type blind3
  set name [lindex [split $name "\0"] 0]

  if {$name == {}} return

  set data [read $fd [expr {$size}]]
  if {$size % 512 != 0} {
    read $fd [expr {512 - ($size % 512)}]
  }

  return [list name $name type $type data [binary encode hex $data]  blind1 $blind1 blind2 $blind2 blind3 $blind3]
}
proc ::tar::fmtrec {rec} {
  set name [dict get $rec name]
  set type [dict get $rec type]
  set size [expr {[string length [dict get $rec data]] / 2}]
  set size [format %011o $size]

  set blind1 [dict get $rec blind1]
  set blind2 [dict get $rec blind2]
  set blind3 [dict get $rec blind3]

  set hdr [binary format a100c24a12c12A8a1c* $name $blind1 $size $blind2 "" $type $blind3]
  binary scan $hdr c* bytes
  set checksum 0
  foreach byte $bytes {
    incr checksum [expr {$byte & 0xFF}]
  }
  set checksum [format %07o $checksum]
  set hdr [binary format a100c24a12c12a8a1c* $name $blind1 $size $blind2 $checksum $type $blind3]

  append hdr [binary decode hex [dict get $rec data]]
  if {$size % 512 != 0} {
    append hdr [binary format a[expr {512 - ($size % 512)}] ""]
  }

  return $hdr
}


proc usage {} {
  global argv0
  puts stderr "usage: $argv0 command store ?args ...?"
  puts stderr "commands:"
  puts stderr "  init - initialize a store in store"
  puts stderr "  importlist ?listfile? - if listfile is ommitted, reads stdin"
  puts stderr "  print hash - prints the contents of <hash> on stdout"
  puts stderr "  stat hash - prints metadata about <hash>"
  puts stderr {usage: $argv0 command store ?args ...?
commands:
  init
    - initialize a store in store
  importlist name ?listfile?
    - imports the contents of listfile under name, treating each line as a filepath
    - if listfile is ommitted, reads stdin
  importtar name ?tarfile?
    - imports the contents of tarfile under name. will not auto-decompress.
    - if tarfile is ommitted, reads stdin
  import ?filepath?
    - stores the contents of filepath, returning the hash
    - if filepath is ommitted, reads stdin
  export id
    - exports the contents of backup id to a file named like NAME_DATE
  print hash
    - prints the contents of the file named by hash on stdout
  stat hash
    - prints metadata about the file named by hash}
  exit -1
}
proc die {reason} {


@@ 186,13 246,13 @@ set pool [tpool::create -maxworkers $nproc -initcmd [concat $init "; " [list set

switch -- $command {
  importlist {
    if {[llength $argv] > 1} usage
    if {[llength $argv] == 1} {
      set fd [open [lindex $argv 0]]
      set fname [lindex $argv 0]
    if {[llength $argv] ni {1 2}} usage
    set fname [lindex $argv 0]

    if {[llength $argv] == 2} {
      set fd [open [lindex $argv 1]]
    } else {
      set fd stdin
      set fname "pipeline"
    }

    set files [split [string trim [read $fd]] "\n"]


@@ 212,7 272,7 @@ switch -- $command {
        close $fd

        set hash [writestore $arpath $data 1]
        return [list [string trimleft $filename "/"] $hash]
        return [list [string trimleft [file normalize $filename] "/"] $hash]
      }]

      lappend joblist [tpool::post $pool $script]


@@ 234,6 294,115 @@ switch -- $command {
    puts stderr "inserted as archive $rowid with hash $ref"
  }

  importtar {
    if {[llength $argv] ni {1 2}} usage
    set fname [lindex $argv 0]

    if {[llength $argv] == 2} {
      set fd [open [lindex $argv 1]] rb
    } else {
      set fd stdin
      fconfigure stdin -translation binary
    }

    set joblist {}
    set result {}

    while {[set record [::tar::nextrec $fd]] != {}} {
      set script [concat [list set record $record] "; " {
        set hash [writestore $arpath [binary decode hex [dict get $record data]] 1]
        dict set record data $hash
        return $record
      }]

      lappend joblist [tpool::post $pool $script]
    }

    if {$fd != "stdin"} { close $fd }

    foreach job $joblist {
      tpool::wait $pool $job
      append result [::tar::fmtrec [tpool::get $pool $job]]
    }

    append result [binary format a[expr {16384 - ([string length $result] % 16384) + 512}] ""]

    set ref [writestore $arpath $result 1]
    set date [clock seconds]
    set rowid [db eval {
      INSERT INTO tarballs VALUES ($fname, "tar", $ref, "", $date) RETURNING rowid;
    }]

    puts stderr "inserted as archive $rowid with hash $ref"
  }

  export {
    if {[llength $argv] != 1} usage

    set rowid [lindex $argv 0]
    set record [db eval {
      SELECT ref,type,name,date FROM tarballs WHERE rowid = $rowid;
    }]
    if {[llength $record] == 0} { die "unknown archive id" }
    lassign $record ref type name date

    set exportname [regsub "/" "${name}_[clock format $date -format {%Y-%m-%d_%H.%M.%S}]" "-"]
    set archive [readstore $arpath $ref 1]

    switch -- $type {
      list {
        file mkdir $exportname

        set joblist {}
        foreach {path hash} $archive {
          set script [concat [list set path "$exportname/$path"] "; " [list set hash $hash] "; " {
            puts "$hash -> $path"
            file mkdir [file dirname $path]
            set fd [open "$path" wb]
            puts -nonewline $fd [readstore $arpath $hash 1]
            close $fd
          }]

          lappend joblist [tpool::post $pool $script]
        }

        foreach job $joblist {
          tpool::wait $pool $job
        }
      }
      tar {
        set exportname "$exportname.tar"
        set exportfd [open $exportname wb]

        set importfd [::tcl::chan::string $archive]
        fconfigure $importfd -translation binary

        set joblist {}
        while {[set record [::tar::nextrec $importfd]] != {}} {
          set script [concat [list set record $record] "; " {
            dict set record data [binary encode hex [readstore $arpath [dict get $record data] 1]]
            return $record
          }]

          lappend joblist [tpool::post -nowait $pool $script]
        }

        foreach job $joblist {
          tpool::wait $pool $job
          puts -nonewline $exportfd [::tar::fmtrec [tpool::get $pool $job]]
        }
        puts -nonewline $exportfd [binary format a[expr {16384 - ([tell $exportfd] % 16384) + 512}] ""]

        close $exportfd
      }
      default {
        die "database corrupt: unknown archive type \"$type\""
      }
    }

    puts "exported to $exportname"
  }

  print {
    if {[llength $argv] != 1} usage



@@ 257,5 426,19 @@ switch -- $command {
    puts "  data size (bytes): [dict get $stat datasize]"
  }

  import {
    if {[llength $argv] > 1} usage
    if {[llength $argv] == 1} {
      set fd [open [lindex $argv 0]]
    } else {
      set fd stdin
    }

    set data [read $fd]

    set hash [writestore $arpath $data 1]
    puts "imported as $hash"
  }

  default usage
}

A naivetar.tcl => naivetar.tcl +67 -0
@@ 0,0 1,67 @@

proc die {reason} {
  global argv0
  puts stderr "$argv0: fatal: $reason"
  exit -2
}

proc nextrec {fd} {
  set hdr [read $fd 512]
  binary scan $hdr c _
  if {[string length $hdr] != 512} { die "got partial header: [string length $hdr]b" }

  binary scan $hdr a100c24a11c1c12c8a1c* name blind1 size _ blind2 _ type blind3
  set name [lindex [split $name "\0"] 0]

  if {$name == {}} return

  set data [read $fd [expr {$size}]]
  if {$size % 512 != 0} {
    read $fd [expr {512 - ($size % 512)}]
  }

  return [list name $name type $type data [binary encode hex $data]  blind1 $blind1 blind2 $blind2 blind3 $blind3]
}

proc putrec {fd rec} {
  set name [dict get $rec name]
  set type [dict get $rec type]
  set size [expr {[string length [dict get $rec data]] / 2}]
  set size [format %011o $size]

#  puts "$name - $size"

  set blind1 [dict get $rec blind1]
  set blind2 [dict get $rec blind2]
  set blind3 [dict get $rec blind3]

  set hdr [binary format a100c24a12c12A8a1c* $name $blind1 $size $blind2 "" $type $blind3]
  binary scan $hdr c* bytes
  set checksum 0
  foreach byte $bytes {
    incr checksum [expr {$byte & 0xFF}]
  }
  set checksum [format %07o $checksum]
  set hdr [binary format a100c24a12c12a8a1c* $name $blind1 $size $blind2 $checksum $type $blind3]

  puts -nonewline $fd $hdr

  puts -nonewline $fd [binary decode hex [dict get $rec data]]
  if {$size % 512 != 0} {
    puts -nonewline $fd [binary format a[expr {512 - ($size % 512)}] ""]
  }
}

set fd [open [lindex $argv 0] rb]
set fd2 [open [lindex $argv 1] wb]

while {[set record [nextrec $fd]] != {}} {
  if {[dict get $record type] == "0"} {
    puts "type of [dict get $record name] is 0, overwriting"
    dict set record data [binary encode hex "Hello, World!"]
#    puts $record
  }
  putrec $fd2 $record
}

puts -nonewline $fd2 [binary format a[expr {16384 - ([tell $fd2] % 16384)}] ""]