From adc92ee610f06452696bc02cf8027a3832de79ba Mon Sep 17 00:00:00 2001 From: Aleteoryx Date: Fri, 21 Feb 2025 02:02:05 -0500 Subject: [PATCH] storage --- .gitignore | 3 ++ PLAN | 22 +++++++++++++ backupper.tcl | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 .gitignore create mode 100644 PLAN create mode 100755 backupper.tcl diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..0a11896adf299181be31eb4e82830cd821b7b5c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*~ +store/ +*.png diff --git a/PLAN b/PLAN new file mode 100644 index 0000000000000000000000000000000000000000..d3c6f25b8f0c4ef90c796f4216459978bd590e1c --- /dev/null +++ b/PLAN @@ -0,0 +1,22 @@ +store consists of md5 content-addressed files + +store file contents: +"AMEBAK" [version byte = 0] [compression mode byte] [file contents] + +compression mode: + 'r' - uncompressed + 'F' - deFlate + +compression should be attempted for all files, algorithms tested with +the first 10MB of data. if the reduction is <5%, do not compress. this +should make large high-entropy files have less overhead. + +directory contents: + store/ - see above + index.db - list of backup files + +backup files are stored in the store, and indexed in index.db. they are +archive files, with the contents of each file replaced with a binary +hash. + + diff --git a/backupper.tcl b/backupper.tcl new file mode 100755 index 0000000000000000000000000000000000000000..82f38db5e91c0f0387b22e85953fb74453245e1a --- /dev/null +++ b/backupper.tcl @@ -0,0 +1,85 @@ +#!/bin/env tclsh + +package require sha256 +package require sqlite3 + +proc readbak {path} { + set fd [open $path rb] + set magic [read $fd 6] + if {$magic != "AMEBAK"} { + close $fd + return -code error "invalid magic string: \"$magic\"" + } + + set ver [read $fd 1] + binary scan $ver c ver + if {$ver != 0} { + close $fd + return -code error "archive file too new: $ver (self: 0)" + } + + set mode [read $fd 1] + switch -- $mode { + r {} + F { + zlib push inflate $fd -level 9 + } + default { + close $fd + return -code error "unknown compression mode: $mode" + } + } + + set ret [read $fd] + close $fd + return $ret +} + +proc readstore {store hash} { + set hexhash [binary encode hex $hash] + set path $store + append path "/[string range $hexhash 0 1]" + append path "/[string range $hexhash 2 3]" + append path "/[string range $hexhash 4 5]/" + append path "/[string range $hexhash 6 end]" + if {[file exists $path]} { + return [readbak $path] + } else { + return -code error "missing file" + } +} + +proc writebak {path data} { + set bench [string range $data 0 10000000] + binary scan $bench c _ + set Fbench [zlib deflate $bench 9] + + set fd [open $path wb] + puts -nonewline $fd "AMEBAK\0" + + if {[string length $Fbench] < ([string length $bench] * 95 / 100)} { + puts -nonewline $fd "F" + zlib push deflate $fd -level 9 + } else { puts -nonewline $fd "r" } + + puts -nonewline $fd $data + close $fd +} + +proc writestore {store data} { + set hash [::sha2::sha256 -bin -- $data] + set hexhash [binary encode hex $hash] + set path $store + append path "/[string range $hexhash 0 1]" + append path "/[string range $hexhash 2 3]" + append path "/[string range $hexhash 4 5]/" + file mkdir $path + set path "$path/[string range $hexhash 6 end]" + + if {![file exists $path]} { + writebak $path $data + } + + return $hash +} +