~aleteoryx/backupper

ad46462a4c98445333fea1bfec689a79044be7cc — Aleteoryx 9 months ago 1db63d2
parallelism, utility commands
1 files changed, 168 insertions(+), 117 deletions(-)

M backupper.tcl
M backupper.tcl => backupper.tcl +168 -117
@@ 1,139 1,146 @@
#!/bin/env tclsh

package require sha256
package require sqlite3
package require Thread

proc readbak {path} {
  set fd [open $path rb]
  set magic [read $fd 6]
  if {$magic != "AMEBAK"} {
    close $fd
    return -code error "invalid magic string: \"$magic\""
  }
set init {
  package require sha256

  set ver [read $fd 1]
  binary scan $ver c ver
  if {$ver != 0} {
    close $fd
    return -code error "archive file too new: $ver (self: 0)"
  }

  set mode [read $fd 1]
  read $fd 8; # skip filesize
  switch -- $mode {
    r {}
    F {
      zlib push inflate $fd -level 9
    }
    default {
  proc readbak {path} {
    set fd [open $path rb]
    set magic [read $fd 6]
    if {$magic != "AMEBAK"} {
      close $fd
      return -code error "unknown compression mode: $mode"
      return -code error "invalid magic string: \"$magic\""
    }
  }

  set ret [read $fd]
  close $fd
  return $ret
}
proc readstore {store hash {hex 0}} {
  if {!$hex} { set hexhash [binary encode hex $hash] }
  set path $store
  append path "/[string range $hexhash 0 1]"
  append path "/[string range $hexhash 2 3]"
  append path "/[string range $hexhash 4 5]/"
  append path "/[string range $hexhash 6 end]"
  if {[file exists $path]} {
    return [readbak $path]
  } else {
    return -code error "missing file"
  }
}
    set ver [read $fd 1]
    binary scan $ver c ver
    if {$ver != 0} {
      close $fd
      return -code error "archive file too new: $ver (self: 0)"
    }

proc writebak {path data} {
  binary scan $data c _
  set bench [string range $data 0 10000000]
  binary scan $bench c _
  set Fbench [zlib deflate $bench 9]
    set mode [read $fd 1]
    read $fd 8; # skip filesize
    switch -- $mode {
      r {}
      F {
        zlib push inflate $fd -level 9
      }
      default {
        close $fd
        return -code error "unknown compression mode: $mode"
      }
    }

  set fd [open $path wb]
  puts -nonewline $fd "AMEBAK\0"
    set ret [read $fd]
    close $fd
    return $ret
  }
  proc readstore {store hash {hex 0}} {
    set hexhash $hash
    if {!$hex} { set hexhash [binary encode hex $hash] }
    set path $store
    append path "/[string range $hexhash 0 1]"
    append path "/[string range $hexhash 2 3]"
    append path "/[string range $hexhash 4 5]/"
    append path "/[string range $hexhash 6 end]"
    if {[file exists $path]} {
      return [readbak $path]
    } else {
      return -code error "missing file"
    }
  }

  set compress 0
  if {[string length $Fbench] < ([string length $bench] * 95 / 100)} {
    puts -nonewline $fd "F"
    set compress 1
  } else { puts -nonewline $fd "r" }
  proc writebak {path data} {
    binary scan $data c _
    set bench [string range $data 0 10000000]
    binary scan $bench c _
    set Fbench [zlib deflate $bench 9]

  puts -nonewline $fd [binary format W [string length $data]]
    set fd [open $path wb]
    puts -nonewline $fd "AMEBAK\0"

  if {$compress} { zlib push deflate $fd -level 9 }
    set compress 0
    if {[string length $Fbench] < ([string length $bench] * 95 / 100)} {
      puts -nonewline $fd "F"
      set compress 1
    } else { puts -nonewline $fd "r" }

  puts -nonewline $fd $data
  close $fd
}
proc writestore {store data {hex 0}} {
  set hash [::sha2::sha256 -bin -- $data]
  set hexhash [binary encode hex $hash]
  set path $store
  append path "/[string range $hexhash 0 1]"
  append path "/[string range $hexhash 2 3]"
  append path "/[string range $hexhash 4 5]/"
  file mkdir $path
  set path "$path/[string range $hexhash 6 end]"

  if {![file exists $path]} {
    writebak $path $data
  }
    puts -nonewline $fd [binary format W [string length $data]]

  if {$hex} {
    return $hexhash
  } else {
    return $hash
  }
}
    if {$compress} { zlib push deflate $fd -level 9 }

proc statbak {path} {
  set fd [open $path rb]
  set magic [read $fd 6]
  if {$magic != "AMEBAK"} {
    puts -nonewline $fd $data
    close $fd
    return -code error "invalid magic string: \"$magic\""
  }
  proc writestore {store data {hex 0}} {
    set hash [::sha2::sha256 -bin -- $data]
    set hexhash [binary encode hex $hash]
    set path $store
    append path "/[string range $hexhash 0 1]"
    append path "/[string range $hexhash 2 3]"
    append path "/[string range $hexhash 4 5]/"
    file mkdir $path
    set path "$path/[string range $hexhash 6 end]"

    if {![file exists $path]} {
      writebak $path $data
    }

  set ver [read $fd 1]
  binary scan $ver c ver
  if {$ver != 0} {
    close $fd
    return -code error "archive file too new: $ver (self: 0)"
    if {$hex} {
      return $hexhash
    } else {
      return $hash
    }
  }

  set mode [read $fd 1]
  switch -- $mode {
    r { set mode raw }
    F { set mode deflate }
    default {
  proc statbak {path} {
    set fd [open $path rb]
    set magic [read $fd 6]
    if {$magic != "AMEBAK"} {
      close $fd
      return -code error "unknown compression mode: $mode"
      return -code error "invalid magic string: \"$magic\""
    }
  }

  set disksize [file size $path]
  binary scan [read $fd 8] W datasize
    set ver [read $fd 1]
    binary scan $ver c ver
    if {$ver != 0} {
      close $fd
      return -code error "archive file too new: $ver (self: 0)"
    }

  close $fd
  return [list ver $ver mode $mode disksize $disksize datasize $datasize]
}
proc statstore {store hash {hex 0}} {
  if {$$hex} { set hexhash [binary encode hex $hash] }
  set path $store
  append path "/[string range $hexhash 0 1]"
  append path "/[string range $hexhash 2 3]"
  append path "/[string range $hexhash 4 5]/"
  append path "/[string range $hexhash 6 end]"
  if {![file exists $path]} return

  statbak $path
    set mode [read $fd 1]
    switch -- $mode {
      r { set mode raw }
      F { set mode deflate }
      default {
        close $fd
        return -code error "unknown compression mode: $mode"
      }
    }

    set disksize [file size $path]
    binary scan [read $fd 8] W datasize

    close $fd
    return [list ver $ver mode $mode disksize $disksize datasize $datasize]
  }
  proc statstore {store hash {hex 0}} {
    set hexhash $hash
    if {!$hex} { set hexhash [binary encode hex $hash] }
    set path $store
    append path "/[string range $hexhash 0 1]"
    append path "/[string range $hexhash 2 3]"
    append path "/[string range $hexhash 4 5]/"
    append path "/[string range $hexhash 6 end]"
    if {![file exists $path]} return

    dict replace [statbak $path] path $path
  }
}
eval $init

proc usage {} {
  global argv0


@@ 141,6 148,8 @@ proc usage {} {
  puts stderr "commands:"
  puts stderr "  init - initialize a store in store"
  puts stderr "  importlist ?listfile? - if listfile is ommitted, reads stdin"
  puts stderr "  print hash - prints the contents of <hash> on stdout"
  puts stderr "  stat hash - prints metadata about <hash>"
  exit -1
}
proc die {reason} {


@@ 170,6 179,11 @@ if {$command == "init"} {

if {[catch {sqlite3 db $dbpath -create false}]} { die "can't find a store at \"$store\"." }

set fd [open /proc/cpuinfo r]
set nproc [llength [regexp -all -lineanchor -inline "^processor" [read $fd]]]
close $fd
set pool [tpool::create -maxworkers $nproc -initcmd [concat $init "; " [list set arpath $arpath]]]

switch -- $command {
  importlist {
    if {[llength $argv] > 1} usage


@@ 182,8 196,9 @@ switch -- $command {
    }

    set files [split [string trim [read $fd]] "\n"]
    set joblist {}
    set filelist {}
    close $fd
    if {$fd != "stdin"} { close $fd }

    foreach filename $files {
      if {![file exists $filename]} {


@@ 191,20 206,56 @@ switch -- $command {
        continue
      }

      set fd [open $filename rb]
      set data [read $fd]
      close $fd
      set script [concat [list set filename $filename] "; " {
        set fd [open $filename rb]
        set data [read $fd]
        close $fd

      set hash [writestore $arpath $data 1]
      lappend filelist [string trimleft $filename "/"] $hash
        set hash [writestore $arpath $data 1]
        return [list [string trimleft $filename "/"] $hash]
      }]

      lappend joblist [tpool::post $pool $script]
    }

    foreach job $joblist {
      tpool::wait $pool $job
      append filelist " [tpool::get $pool $job]"
    }

    set filelist [lsort -stride 2 $filelist]

    set ref [writestore $arpath $filelist 1]
    set date [clock seconds]
    set rowid [db eval {
      INSERT INTO tarballs VALUES ($fname, "list", $ref, "", $date) RETURNING rowid;
    }]

    puts stderr "inserted as archive $rowid"
    puts stderr "inserted as archive $rowid with hash $ref"
  }

  print {
    if {[llength $argv] != 1} usage

    set stat [statstore $arpath [lindex $argv 0] 1]
    if {$stat == {}} { die "unknown hash" }

    puts [readstore $arpath [lindex $argv 0] 1]
  }
  stat {
    if {[llength $argv] != 1} usage

    set stat [statstore $arpath [lindex $argv 0] 1]
    if {$stat == {}} { die "unknown hash" }

    puts "file [lindex $argv 0]:"
    puts "  version: [dict get $stat ver]"
    puts "  compression: [dict get $stat mode]"
    if {[dict get $stat mode] != "raw"} {
      puts "  compressed size (bytes): [dict get $stat disksize]"
    }
    puts "  data size (bytes): [dict get $stat datasize]"
  }

  default usage
}