~aleteoryx/backupper

adc92ee610f06452696bc02cf8027a3832de79ba — Aleteoryx 9 months ago
storage
3 files changed, 110 insertions(+), 0 deletions(-)

A .gitignore
A PLAN
A backupper.tcl
A  => .gitignore +3 -0
@@ 1,3 @@
*~
store/
*.png

A  => PLAN +22 -0
@@ 1,22 @@
store consists of md5 content-addressed files

store file contents:
"AMEBAK" [version byte = 0] [compression mode byte] [file contents]

compression mode:
  'r' - uncompressed
  'F' - deFlate

compression should be attempted for all files, algorithms tested with
the first 10MB of data. if the reduction is <5%, do not compress. this
should make large high-entropy files have less overhead.

directory contents:
  store/ - see above
  index.db - list of backup files

backup files are stored in the store, and indexed in index.db. they are
archive files, with the contents of each file replaced with a binary
hash.



A  => backupper.tcl +85 -0
@@ 1,85 @@
#!/bin/env tclsh

package require sha256
package require sqlite3

proc readbak {path} {
  set fd [open $path rb]
  set magic [read $fd 6]
  if {$magic != "AMEBAK"} {
    close $fd
    return -code error "invalid magic string: \"$magic\""
  }

  set ver [read $fd 1]
  binary scan $ver c ver
  if {$ver != 0} {
    close $fd
    return -code error "archive file too new: $ver (self: 0)"
  }

  set mode [read $fd 1]
  switch -- $mode {
    r {}
    F {
      zlib push inflate $fd -level 9
    }
    default {
      close $fd
      return -code error "unknown compression mode: $mode"
    }
  }

  set ret [read $fd]
  close $fd
  return $ret
}

proc readstore {store hash} {
  set hexhash [binary encode hex $hash]
  set path $store
  append path "/[string range $hexhash 0 1]"
  append path "/[string range $hexhash 2 3]"
  append path "/[string range $hexhash 4 5]/"
  append path "/[string range $hexhash 6 end]"
  if {[file exists $path]} {
    return [readbak $path]
  } else {
    return -code error "missing file"
  }
}

proc writebak {path data} {
  set bench [string range $data 0 10000000]
  binary scan $bench c _
  set Fbench [zlib deflate $bench 9]

  set fd [open $path wb]
  puts -nonewline $fd "AMEBAK\0"

  if {[string length $Fbench] < ([string length $bench] * 95 / 100)} {
    puts -nonewline $fd "F"
    zlib push deflate $fd -level 9
  } else { puts -nonewline $fd "r" }

  puts -nonewline $fd $data
  close $fd
}

proc writestore {store data} {
  set hash [::sha2::sha256 -bin -- $data]
  set hexhash [binary encode hex $hash]
  set path $store
  append path "/[string range $hexhash 0 1]"
  append path "/[string range $hexhash 2 3]"
  append path "/[string range $hexhash 4 5]/"
  file mkdir $path
  set path "$path/[string range $hexhash 6 end]"

  if {![file exists $path]} {
    writebak $path $data
  }

  return $hash
}