ADDED artifacts/README Index: artifacts/README ================================================================== --- /dev/null +++ artifacts/README @@ -0,0 +1,1 @@ +NOTE: keep megatest/artifacts/ in sync with datastore/artifacts ADDED artifacts/artifacts.meta Index: artifacts/artifacts.meta ================================================================== --- /dev/null +++ artifacts/artifacts.meta @@ -0,0 +1,21 @@ +;; -*- scheme -*- +( +; Your egg's license: +(license "BSD") + +; Pick one from the list of categories (see below) for your egg and enter it +; here. +(category db) + +; A list of eggs pkts depends on. If none, you can omit this declaration +; altogether. If you are making an egg for chicken 3 and you need to use +; procedures from the `files' unit, be sure to include the `files' egg in the +; `needs' section (chicken versions < 3.4.0 don't provide the `files' unit). +; `depends' is an alias to `needs'. +;; (needs (autoload "3.0")) + +; A list of eggs required for TESTING ONLY. See the `Tests' section. +(test-depends test) + +(author "Matt Welland") +(synopsis "A sha1-chain based datastore similar to the data format in fossil scm, consisting of artifacts of single line cards.")) ADDED artifacts/artifacts.release-info Index: artifacts/artifacts.release-info ================================================================== --- /dev/null +++ artifacts/artifacts.release-info @@ -0,0 +1,3 @@ +(repo fossil "http://www.kiatoa.com/cgi-bin/fossils/{egg-name}") +(uri zip "http://www.kiatoa.com/cgi-bin/fossils/{egg-name}/zip/{egg-name}.zip?uuid={egg-release}") +(release "1.0") ADDED artifacts/artifacts.scm Index: artifacts/artifacts.scm ================================================================== --- /dev/null +++ artifacts/artifacts.scm @@ -0,0 +1,1624 @@ +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of artifacts +;; +;; Pkts is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Pkts is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Pkts. If not, see . +;; + +;; CARDS: +;; +;; A card is a line of text, the first two characters are a letter followed by a +;; space. The letter is the card type. +;; +;; artifact: +;; +;; An artifact is a sorted list of cards with a final card Z that contains the shar1 hash +;; of all of the preceding cards. +;; +;; AARTIFACT: +;; +;; An alist mapping card types to card data +;; '((T . "artifacttype") +;; (a . "some content")) +;; +;; EARTIFACT: +;; +;; Extended packet using friendly keys. Must use a artifactspec to convert to/from eartifacts +;; '((ptype . "artifacttype") +;; (adata . "some content)) +;; +;; DARTIFACT: +;; +;; artifacts pulled from the database have this format: +;; +;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b") <= this is a the alist +;; (t . "v1.63/tip/dev") +;; (c . "QUICKPATT") +;; (T . "runstart") +;; (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd") +;; (D . "1488995096.0")) +;; (id . 8) +;; (group-id . 0) +;; (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b") +;; (parent . "") +;; (artifact-type . "runstart") +;; (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b")) +;; +;; artifactspec is alist of alists mapping types and nicekeys to keys +;; +;; '((posting . ((title . t) +;; (url . u) +;; (blurb . b))) +;; (comment . ((comment . c) +;; (score . s)))) + +;; Reserved cards: +;; P : artifact parent +;; R : reference artifact containing mapping of short string -> sha1sum strings +;; T : artifact type +;; D : current time from (current-time), unless provided +;; Z : shar1 hash of the packet + +;; Example usage: +;; +;; Create a artifact: +;; +;; (use artifacts) +;; (define-values (uuid artifact) +;; (alist->artifact +;; '((fruit . "apple") (meat . "beef")) ;; this is the data to convert +;; '((foods (fruit . f) (meat . m))) ;; this is the artifact spec +;; ptype: +;; 'foods)) +;; +;; Add to artifact queue: +;; +;; (define db (open-queue-db "/tmp/artifacts" "artifacts.db")) +;; (add-to-queue db artifact uuid 'foods #f 0) ;; no parent and use group_id of 0 +;; +;; Retrieve the packet from the db and extract a value: +;; +;; (alist-ref +;; 'meat +;; (dartifact->alist +;; (car (get-dartifacts db #f 0 #f)) +;; '((foods (fruit . f) +;; (meat . m))))) +;; => "beef" +;; + +(module artifacts +( +;; cards, util and misc +;; sort-cards +;; calc-sha1 +;; +;; low-level constructor procs, exposed only for development/testing, will be removed +construct-sdat +construct-artifact +card->type/value +add-z-card + +;; queue database procs +open-queue-db +add-to-queue +create-and-queue +;; lookup-by-uuid +lookup-by-id +get-dartifacts +get-not-processed-artifacts +get-related +find-artifacts +process-artifacts +get-descendents +get-ancestors +get-artifacts +;; get-last-descendent +;; with-queue-db +;; load-artifacts-to-db + +;; procs that operate directly on artifacts, sdat, aartifacts, dartifacts etc. +artifact->alist ;; artifact -> aartifact (i.e. alist) +artifact->sdat ;; artifact -> '("a aval" "b bval" ...) +sdat->alist ;; '("a aval" "b bval"...) -> ((a . "aval")(b . "bval") ...) +dblst->dartifacts ;; convert list of tuples from queue db into dartifacts +dartifact->alist ;; flatten a dartifact into an alist containing all db fields and the artifact alist +dartifacts->alists ;; apply dartifact->alist to a list of alists using a artifact-spec +alist->artifact ;; returns two values uuid, artifact +get-value ;; looks up a value given a key in a dartifact +flatten-all ;; merge the list of values from a query which includes a artifact into a flat alist <== really useful! +check-artifact + +;; artifact alists +write-alist->artifact +read-artifact->alist + +;; archive database +;; archive-open-db +;; write-archive-artifacts +;; archive-artifacts +;; mark-processed + +;; artifactsdb +artifactdb-conn ;; useful +artifactdb-fname +artifactsdb-open +artifactsdb-close +artifactsdb-add-record +;; temporary +artifactdb-artifactspec + +;; utility procs +increment-string ;; used to get indexes for strings in ref artifacts +make-report ;; make a .dot file +calc-sha1 +uuid-first-two-letters +uuid-remaining-letters + +;; file and directory utils +multi-glob +capture-dir +file-get-sha1 +check-same +link-or-copy +same-partition? +link-if-same-partition +archive-copy +write-to-archive +artifact-rollup +read-artifacts-into-hash +hash-of-artifacts->bundle +archive-dest + +;; pathname-full-filename + +;; minimal artifact functions +minimal-artifact-read +minimal-artifact->alist +afact-get-D +afact-get-Z +afact-get-T +afact-get +afact-get-number/default + + +;; bundles +write-bundle +read-bundle + +;; new artifacts db +with-todays-adb +get-all-artifacts +refresh-artifacts-db + +) + +(import (chicken base) scheme (chicken process) (chicken time posix) + (chicken io) (chicken file) (chicken pathname) + chicken.process-context.posix (chicken string) + (chicken time) (chicken sort) (chicken file posix) (chicken condition) srfi-1 + regex srfi-13 srfi-69 (chicken port) (chicken process-context) + crypt sha1 matchable message-digest sqlite3 typed-records + directory-utils + scsh-process) + +;;====================================================================== +;; DATA MANIPULATION UTILS +;;====================================================================== + +(define-inline (unescape-data data) + (string-translate* data '(("\\n" . "\n") ("\\\\" . "\\")))) + +(define-inline (escape-data data) + (string-translate* data '(("\n" . "\\n") ("\\" . "\\\\")))) + +(define-inline (make-card type data) + (conc type " " (escape-data (->string data)))) + +;; reverse an alist for doing artifactkey -> external key conversions +;; +(define-inline (reverse-aspec aspec) + (map (lambda (dat) + (cons (cdr dat)(car dat))) + aspec)) + +;; add a card to the list of cards, sdat +;; if type is #f return only sdat +;; if data is #f return only sdat +;; +(define-inline (add-card sdat type data) + (if (and type data) + (cons (make-card type data) sdat) + sdat)) + +;;====================================================================== +;; STRING AS FUNKY NUMBER +;;====================================================================== + +;; NOTE: PTDZ are removed as they are reserved. NB// the R card is not used in a +;; ref, instead the P parent card is used. +;; Question: Why does it matter to remove PTDZ? +;; To make the ref easier to use the ref strings will be the keys +;; so we cannot have overlap with any actual keys. But this is a +;; bit silly. What we need to do instead is reject keys of length +;; one where the char is in PTDZ +;; +;; This is basically base92 +;; +(define string-num-chars (string->list "!#$%&'()*+,-./0123456789:;<=>?@ABCEFGHIJKLMNOQRSUVWXY[\\]^_abcdefghijklmnopqrstuvwxyz{|}~")) +;; "0123456789abcdefghijklmnopqrstuvwxyzABCEFGHIJKLMNOQSUVWXY!#$%&'()*+,-./[]:;<=>?\\^_{}|")) + +(define (char-incr inchar) + (let* ((carry #f) + (next-char (let ((rem (member inchar string-num-chars))) + (if (eq? (length rem) 1) ;; we are at the last character in our string-num-chars list + (begin + (set! carry #t) + (car string-num-chars)) + (cadr rem))))) + (values next-char carry))) + +(define (increment-string str) + (if (string-null? str) + "0" + (let ((strlst (reverse (string->list str)))) ;; need to process the string from the lsd + (list->string + (let loop ((hed (car strlst)) + (tal (cdr strlst)) + (res '())) + (let-values (((newhed carry)(char-incr hed))) + ;; (print "newhed: " newhed " carry: " carry " tal: " tal) + (let ((newres (cons newhed res))) + (if carry ;; we'll have to propagate the carry + (if (null? tal) ;; at the end, tack on "0" (which is really a "1") + (cons (car string-num-chars) newres) + (loop (car tal)(cdr tal) newres)) + (append (reverse tal) newres))))))))) + +;;====================================================================== +;; P K T S D B I N T E R F A C E +;; +;; INTEGER, REAL, TEXT +;;====================================================================== +;; +;; spec +;; ( (tablename1 . (field1name L1 TYPE) +;; (field2name L2 TYPE) ... ) +;; (tablename2 ... )) +;; +;; Example: (tests (testname n TEXT) +;; (rundir r TEXT) +;; ... ) +;; +;; artifact keys are taken from the first letter, if that is not unique +;; then look at the next letter and so on +;; + +;; simplify frequent need to get one result with default +;; +(define (get-one db default qry . params) + (apply fold-row + car + default + db + qry + params)) + +(define (get-rows db qry . params) + (apply fold-row + cons + db + qry + params)) + +;; use this struct to hold the artifactspec and the db handle +;; +(defstruct artifactdb + (fname #f) + (artifactsdb-spec #f) + (artifactspec #f) ;; cache the artifactspec + (field-keys #f) ;; cache the field->key mapping (field1 . k1) ... + (key-fields #f) ;; cache the key->field mapping + (conn #f) + ) + +;; WARNING: There is a simplification in the artifactsdb spec w.r.t. artifactspec. +;; The field specs are the cdr of the table list - not a full +;; list. The extra list level in artifactspec is gratuitous and should +;; be removed. +;; +(define (artifactsdb-spec->artifactspec tables-spec) + (map (lambda (tablespec) + (list (car tablespec) + (map (lambda (field-spec) + (cons (car field-spec)(cadr field-spec))) + (cdr tablespec)))) + tables-spec)) + +(define (artifactsdb-open dbfname artifactsdb-spec) + (let* ((pdb (make-artifactdb)) + (dbexists (file-exists? dbfname)) + (db (open-database dbfname))) + (artifactdb-artifactsdb-spec-set! pdb artifactsdb-spec) + (artifactdb-artifactspec-set! pdb (artifactsdb-spec->artifactspec artifactsdb-spec)) + (artifactdb-fname-set! pdb dbfname) + (artifactdb-conn-set! pdb db) + (if (not dbexists) + (artifactsdb-init pdb)) + pdb)) + +(define (artifactsdb-init artifactsdb) + (let* ((db (artifactdb-conn artifactsdb)) + (artifactsdb-spec (artifactdb-artifactsdb-spec artifactsdb))) + ;; create a table for the artifacts themselves + (execute db "CREATE TABLE IF NOT EXISTS artifacts (id INTEGER PRIMARY KEY, zkey TEXT, record_id INTEGER, artifact TEXT);") + (for-each + (lambda (table) + (let* ((table-name (car table)) + (fields (cdr table)) + (stmt (conc "CREATE TABLE IF NOT EXISTS " + table-name + " (id INTEGER PRIMARY KEY," + (string-intersperse + (map (lambda (fieldspec) + (conc (car fieldspec) " " + (caddr fieldspec))) + fields) + ",") + ");"))) + (execute db stmt))) + artifactsdb-spec))) + +;; create artifact from the data and insert into artifacts table +;; +;; data is assoc list of (field . value) ... +;; tablename is a symbol matching the table name +;; +(define (artifactsdb-add-record artifactsdb tablename data #!optional (parent #f)) + (let*-values (((zkey artifact) (alist->artifact data (artifactdb-artifactspec artifactsdb) ptype: tablename))) + ;; have the data as alist so insert it into appropriate table also + (let* ((db (artifactdb-conn artifactsdb))) + ;; TODO: Address collisions + (execute db "INSERT INTO artifacts (zkey,artifact,record_id) VALUES (?,?,?);" + zkey artifact -1) + (let* (;; (artifactid (artifactsdb-artifactkey->artifactid artifactsdb artifactkey)) + (record-id (artifactsdb-insert artifactsdb tablename data))) + (execute db "UPDATE artifacts SET record_id=? WHERE zkey=?;" + record-id zkey) + )))) + +;; +(define (artifactsdb-insert artifactsdb tablename data) + (let* ((db (artifactdb-conn artifactsdb)) + (stmt (conc "INSERT INTO " tablename + " (" (string-intersperse (map conc (map car data)) ",") + ") VALUES ('" + ;; TODO: Add lookup of data type and do not + ;; wrap integers with quotes + (string-intersperse (map conc (map cdr data)) "','") + "');"))) + (print "stmt: " stmt) + (execute db stmt) + ;; lookup the record-id and return it + + )) + +(define (artifactsdb-close artifactsdb) + (finalize! (artifactdb-conn artifactsdb))) + +;; (let loop ((s "0")(n 0))(print s)(if (< n 5000)(loop (increment-string s)(+ n 1)))) + +;;====================================================================== +;; CARDS, MISC and UTIL +;;====================================================================== + +;; given string (likely multi-line) "dat" return shar1 hash +;; +(define (calc-sha1 instr) + (message-digest-string + (sha1-primitive) + instr)) + +;; given a single card return its type and value +;; +(define (card->type/value card) + (let ((ctype (substring card 0 1)) + (cval (substring card 2 (string-length card)))) + (values (string->symbol ctype) cval))) + +;;====================================================================== +;; SDAT procs +;; sdat is legacy/internal usage. Intention is to remove sdat calls from +;; the exposed calls. +;;====================================================================== + +;; sort list of cards +;; +(define-inline (sort-cards sdat) + (sort sdat string<=?)) + +;; artifact rules +;; 1. one card per line +;; 2. at least one card +;; 3. no blank lines + +;; given sdat, a list of cards return uuid, packet (as sdat) +;; +(define (add-z-card sdat) + (let* ((sorted-sdat (sort-cards sdat)) + (dat (string-intersperse sorted-sdat "\n")) + (uuid (calc-sha1 dat))) + (values + uuid + (conc + dat + "\nZ " + uuid)))) + +(define (check-artifact artifact) + (handle-exceptions + exn + #f ;; anything goes wrong - call it a crappy artifact + (let* ((sdat (string-split artifact "\n")) + (rdat (reverse sdat)) ;; reversed + (zdat (car rdat)) + (Z (cadr (string-split zdat))) + (cdat (string-intersperse (reverse (cdr rdat)) "\n"))) + (equal? Z (calc-sha1 cdat))))) + +;;====================================================================== +;; AARTIFACTs +;;====================================================================== + +;; convert a sdat (list of cards) to an alist +;; +(define (sdat->alist sdat) + (let loop ((hed (car sdat)) + (tal (cdr sdat)) + (res '())) + (let-values (( (ctype cval)(card->type/value hed) )) + ;; if this card is not one of the common ones tack it on to rem + (let* ((oldval (alist-ref ctype res)) + (newres (cons (cons ctype + (if oldval ;; list or string + (if (list? oldval) + (cons cval oldval) + (cons cval (list oldval))) + cval)) + res))) + (if (null? tal) + newres + (loop (car tal)(cdr tal) newres)))))) + +;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b") <= this is a the alist +;; (t . "v1.63/tip/dev") +;; (c . "QUICKPATT") +;; (T . "runstart") +;; (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd") +;; (D . "1488995096.0")) +;; (id . 8) +;; (group-id . 0) +;; (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b") +;; (parent . "") +;; (artifact-type . "runstart") +;; (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b")) +;; +;; artifactspec is alist of alists mapping types and nicekeys to keys +;; +;; '((posting . ((title . t) +;; (url . u) +;; (blurb . b))) +;; (comment . ((comment . c) +;; (score . s)))) + +;; DON'T USE? +;; +(define (get-value field dartifact . spec-in) + (if (null? spec-in) + (alist-ref field dartifact) + (let* ((spec (car spec-in)) + (aartifact (alist-ref 'aartifact dartifact))) ;; get the artifact alist + (if (and aartifact spec) + (let* ((ptype (alist-ref 'artifact-type dartifact)) + (pspec (alist-ref (string->symbol ptype) spec))) ;; do we have a spec for this type of artifact + (and pspec + (let* ((key (alist-ref field pspec))) + (and key (alist-ref key aartifact))))) + #f)))) + +;; convert a dartifact to a pure alist given a artifactspec +;; this flattens out the alist to include the data from +;; the queue database record +;; +(define (dartifact->alist dartifact artifactspec) + (let* ((aartifact (alist-ref 'aartifact dartifact)) + (artifact-type (or (alist-ref 'artifact-type dartifact) ;; artifact-type is from the database field artifact_type + (alist-ref 'T aartifact))) + (artifact-fields (alist-ref (string->symbol artifact-type) artifactspec)) + (rev-fields (if artifact-fields + (reverse-aspec artifact-fields) + '()))) + (append (map (lambda (entry) + (let* ((artifact-key (car entry)) + (new-key (or (alist-ref artifact-key rev-fields) artifact-key))) + `(,new-key . ,(cdr entry)))) + aartifact) + dartifact))) + +;; convert a list of dartifacts into a list of alists using artifact-spec +;; +(define (dartifacts->alists dartifacts artifact-spec) + (map (lambda (x) + (dartifact->alist x artifact-spec)) + dartifacts)) + +;; Generic flattener, make the tuple and artifact into a single flat alist +;; +;; qry-result-spec is a list of symbols corresponding to each field +;; +(define (flatten-all inlst artifactspec . qry-result-spec) + (map + (lambda (tuple) + (dartifact->alist + (apply dblst->dartifacts tuple qry-result-spec) + artifactspec)) + inlst)) + +;; call like this: +;; (construct-sdat 'a "a data" 'S "S data" ...) +;; returns list of cards +;; ( "A a value" "D 12345678900" ...) +;; +(define (construct-sdat . alldat) + (let ((have-D-card #f)) ;; flag + (if (even? (length alldat)) + (let loop ((type (car alldat)) + (data (cadr alldat)) + (tail (cddr alldat)) + (res '())) + (if (eq? type 'D)(set! have-D-card #t)) + (if (null? tail) + (if have-D-card ;; return the constructed artifact, add a D card if none found + (add-card res type data) + (add-card + (add-card res 'D (current-seconds)) + type data)) + (loop (car tail) + (cadr tail) + (cddr tail) + (add-card res type data)))) + #f))) ;; #f means it failed to create the sdat + +(define (construct-artifact . alldat) + (add-z-card + (apply construct-sdat alldat))) + +;;====================================================================== +;; CONVERTERS +;;====================================================================== + +(define (artifact->sdat artifact) + (map unescape-data (string-split artifact "\n"))) + +;; given a pure artifact return an alist +;; +(define (artifact->alist artifact #!key (artifactspec #f)) + (let ((sdat (cond + ((string? artifact) (artifact->sdat artifact)) + ((list? artifact) artifact) + (else #f)))) + (if artifact + (if artifactspec + (dartifact->alist (list (cons 'aartifact (sdat->alist sdat))) artifactspec) + (sdat->alist sdat)) + #f))) + +;; convert an alist to an sdat +;; in: '((a . "blah")(b . "foo")) +;; out: '("a blah" "b foo") +;; +(define (alist->sdat adat) + (map (lambda (dat) + (conc (car dat) " " (cdr dat))) + adat)) + +;; adat is the incoming alist, aspec is the mapping +;; from incoming key to the artifact key (usually one +;; letter to keep data tight) see the artifactspec at the +;; top of this file +;; +;; NOTE: alists can contain multiple instances of the same key (supported fine by artifacts) +;; but you (obviously I suppose) cannot use alist-ref to access those entries. +;; +(define (alist->artifact adat aspec #!key (ptype #f)(no-d #f)) + (let* ((artifact-type (or ptype + (alist-ref 'T adat) ;; can provide in the incoming alist + #f)) + (artifact-spec (if artifact-type ;; alist of external-key -> key + (or (alist-ref artifact-type aspec) '()) + (if (null? aspec) + '() + (cdar aspec)))) ;; default to first one if nothing specified + (new-alist (map (lambda (dat) + (let* ((key (car dat)) + (val (cdr dat)) + (newkey (or (alist-ref key artifact-spec) + key))) + (cons newkey (escape-data (conc val))))) ;; convert all incoming data (symbols, numbers etc.) to a string and then escape newlines. + adat)) + (new-with-type (if (alist-ref 'T new-alist) + new-alist + (cons `(T . ,artifact-type) new-alist))) + (with-d-card (if (or no-d ;; no timestamp wanted + (alist-ref 'D new-with-type)) + new-with-type + (cons `(D . ,(current-seconds)) + new-with-type)))) + (add-z-card + (alist->sdat with-d-card)))) + +;;====================================================================== +;; D B Q U E U E I N T E R F A C E +;;====================================================================== + +;; artifacts ( +;; id SERIAL PRIMARY KEY, +;; uuid TEXT NOT NULL, +;; parent_uuid TEXT default '', +;; artifact_type INTEGER DEFAULT 0, +;; group_id INTEGER NOT NULL, +;; artifact TEXT NOT NULL + +;; schema is list of SQL statements - can be used to extend db with more tables +;; +(define (open-queue-db dbpath dbfile #!key (schema '())) + (let* ((dbfname (conc dbpath "/" dbfile)) + (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f))) + (db (open-database dbfname))) + ;; (set-busy-handler! (dbi:db-conn db) (busy-timeout 10000)) + (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness. + (for-each + (lambda (stmt) + (execute db stmt)) + (cons "CREATE TABLE IF NOT EXISTS artifacts + (id INTEGER PRIMARY KEY, + group_id INTEGER NOT NULL, + uuid TEXT NOT NULL, + parent_uuid TEXT TEXT DEFAULT '', + artifact_type TEXT NOT NULL, + artifact TEXT NOT NULL, + processed INTEGER DEFAULT 0)" + schema))) ;; 0=not processed, 1=processed, 2... for expansion + db)) + +(define (add-to-queue db artifact uuid artifact-type parent-uuid group-id) + (execute db "INSERT INTO artifacts (uuid,parent_uuid,artifact_type,artifact,group_id) + VALUES(?,?,?,?,?);" ;; $1,$2,$3,$4,$5);" + uuid + (if parent-uuid parent-uuid "");; use null string as placeholder for no parent uuid. + (if artifact-type (conc artifact-type) "") + artifact + group-id)) + +;; given all needed parameters create a artifact and store it in the queue +;; procs is an alist that maps artifact-type to a function that takes a list of artifact params +;; in data and returns the uuid and artifact +;; +(define (create-and-queue conn procs artifact-type parent-uuid group-id data) + (let ((proc (alist-ref artifact-type procs))) + (if proc + (let-values (( (uuid artifact) (proc data) )) + (add-to-queue conn artifact uuid artifact-type parent-uuid group-id) + uuid) + #f))) + +;; given uuid get artifact, if group-id is specified use it (reduces probablity of +;; being messed up by a uuid collision) +;; +(define (lookup-by-uuid db artifact-uuid group-id) + (if group-id + (get-one db "SELECT artifact FROM artifacts WHERE group_id=? AND uuid=?;" group-id artifact-uuid) + (get-one db "SELECT artifact FROM artifacts WHERE uuid=?;" artifact-uuid))) + +;; find a packet by its id +;; +(define (lookup-by-id db id) + (get-one db "SELECT artifact FROM artifacts WHERE id=?;" id)) + + +;;====================================================================== +;; P R O C E S S P K T S +;;====================================================================== + +;; given a list of field values pulled from the queue db generate a list +;; of dartifact's +;; +(define (dblst->dartifacts lst . altmap) + (let* ((maplst (if (null? altmap) + '(id group-id uuid parent artifact-type artifact processed) + altmap)) + (res (map cons maplst lst))) ;; produces list of pairs, i.e an alist + (cons `(aartifact . ,(artifact->alist (alist-ref 'artifact res))) + res))) + +;; NB// ptypes is a list of symbols, '() or #f find all types +;; +(define (get-dartifacts db ptypes group-id parent-uuid #!key (uuid #f)) + (let* ((ptype-qry (if (and ptypes + (not (null? ptypes))) + (conc " IN ('" (string-intersperse (map conc ptypes) "','") "')") + (conc " LIKE '%' "))) + (rows (get-rows + db + (conc + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts + WHERE artifact_type " ptype-qry " AND group_id=? + AND processed=0 " + (if parent-uuid (conc "AND parent_uuid='" parent-uuid "' ") "") + (if uuid (conc "AND uuid='" uuid "' ") "") + "ORDER BY id DESC;") + group-id))) + (map dblst->dartifacts (map vector->list rows)))) + +;; get N artifacts not yet processed for group-id +;; +(define (get-not-processed-artifacts db group-id artifact-type limit offset) + (map dblst->dartifacts + (map vector->list + (get-rows + db + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts + WHERE artifact_type = ? AND group_id = ? AND processed=0 + LIMIT ? OFFSET ?;" + (conc artifact-type) ;; convert symbols to string + group-id + limit + offset + )))) + +;; given a uuid, get not processed child artifacts +;; +(define (get-related db group-id uuid) + (map dblst->dartifacts + (get-rows + db + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts + WHERE parent_uuid=? AND group_id=? AND processed=0;" + uuid group-id))) + +;; generic artifact processor +;; +;; find all packets in group-id of type in ptypes and apply proc to artifactdat +;; +(define (process-artifacts conn group-id ptypes parent-uuid proc) + (let* ((artifacts (get-dartifacts conn ptypes group-id parent-uuid))) + (map proc artifacts))) + +;; criteria is an alist ((k . valpatt) ...) +;; - valpatt is a regex +;; - ptypes is a list of types (symbols expected) +;; match-type: 'any or 'all +;; +(define (find-artifacts db ptypes criteria #!key (processed #f)(match-type 'any)(artifact-spec #f)) ;; processed=#f, don't use, else use + (let* ((artifacts (get-dartifacts db ptypes 0 #f)) + (match-rules (lambda (artifactdat) ;; returns a list of matching rules + (filter (lambda (c) + ;; (print "c: " c) + (let* ((ctype (car c)) ;; card type + (rx (cdr c)) ;; card pattern + ;; (t (alist-ref 'artifact-type artifactdat)) + (artifact (alist-ref 'artifact artifactdat)) + (aartifact (artifact->alist artifact)) + (cdat (alist-ref ctype aartifact))) + ;; (print "cdat: " cdat) ;; " aartifact: " aartifact) + (if cdat + (string-match rx cdat) + #f))) + criteria))) + (res (filter (lambda (artifactdat) + (if (null? criteria) ;; looking for all artifacts + #t + (case match-type + ((any)(not (null? (match-rules artifactdat)))) + ((all)(eq? (length (match-rules artifactdat))(length criteria))) + (else + (print "ERROR: bad match type " match-type ", expecting any or all."))))) + artifacts))) + (if artifact-spec + (dartifacts->alists res artifact-spec) + res))) + +;; get descendents of parent-uuid +;; +;; NOTE: Should be doing something like the following: +;; +;; given a uuid, get not processed child artifacts +;; processed: +;; #f => get all +;; 0 => get not processed +;; 1 => get processed +;; +(define (get-ancestors db group-id uuid #!key (processed #f)) + (map dblst->dartifacts + (map vector->list + (get-rows + db + (conc + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed + FROM artifacts + WHERE uuid IN + (WITH RECURSIVE + tree(uuid,parent_uuid) + AS + ( + SELECT uuid, parent_uuid + FROM artifacts + WHERE uuid = ? + UNION ALL + SELECT t.uuid, t.parent_uuid + FROM artifacts t + JOIN tree ON t.uuid = tree.parent_uuid + ) + SELECT uuid FROM tree) + AND group_id=?" (if processed (conc " AND processed=" processed) "") ";") + uuid group-id)))) + +;; Untested +;; +(define (get-descendents db group-id uuid #!key (processed #f)) + (map dblst->dartifacts + (map vector->list + (get-rows + db + (conc + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed + FROM artifacts + WHERE uuid IN + (WITH RECURSIVE + tree(uuid,parent_uuid) + AS + ( + SELECT uuid, parent_uuid + FROM artifacts + WHERE uuid = ? + UNION ALL + SELECT t.uuid, t.parent_uuid + FROM artifacts t + JOIN tree ON t.parent_uuid = tree.uuid + ) + SELECT uuid FROM tree) + AND group_id=?" (if processed (conc " AND processed=" processed) "") ";") + uuid group-id)))) + +;; look up descendents based on given info unless passed in a list via inlst +;; +;; (define (get-last-descendent db group-id uuid #!key (processed #f)(inlst #f)) +;; (let ((descendents (or inlst (get-descendents db group-id uuid processed: processed)))) +;; (if (null? descendents) +;; #f +;; (last descendents)))) + +;;====================================================================== +;; A R C H I V E S - always to a sqlite3 db +;;====================================================================== + +;; open an archive db +;; path: archive-dir//month.db +;; +#;(define (archive-open-db archive-dir) + (let* ((curr-time (seconds->local-time (current-seconds))) + (dbpath (conc archive-dir "/" (time->string curr-time "%Y"))) + (dbfile (conc dbpath "/" (time->string curr-time "%m") ".db")) + (dbexists (if (file-exists? dbfile) #t (begin (create-directory dbpath #t) #f)))) + (let ((db (open-database dbfile))) + ;; (set-busy-handler! db (busy-timeout 10000)) + (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness. + (execute db "CREATE TABLE IF NOT EXISTS artifacts + (id INTEGER, + group_id INTEGER, + uuid TEXT, + parent_uuid TEXT, + artifact_type TEXT, + artifact TEXT, + processed INTEGER DEFAULT 0)")) + db))) + +;; turn on transactions! otherwise this will be painfully slow +;; +#;(define (write-archive-artifacts src-db db artifact-ids) + (let ((artifacts (get-rows + src-db + (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact FROM artifacts WHERE id IN (" + (string-intersperse (map conc artifact-ids) ",") ")")))) + ;; (dbi:with-transaction + ;; db + (lambda () + (for-each + (lambda (artifact) + (apply execute db "INSERT INTO artifacts (id,group_id,uuid,parent_uuid,artifact_type,artifact) + VALUES (?,?,?,?,?,?)" + artifact)) + artifacts)))) ;; ) + +;; given a list of uuids and lists of uuids move all to +;; the sqlite3 db for the current archive period +;; +#;(define (archive-artifacts conn artifact-ids archive-dir) + (let ((db (archive-open-db archive-dir))) + (write-archive-artifacts conn db artifact-ids) + (finalize! db)) + ;; (pg:with-transaction + ;; conn + ;; (lambda () + (for-each + (lambda (id) + (get-one + conn + "DELETE FROM artifacts WHERE id=?" id)) + artifact-ids)) ;; )) + +;; given a list of ids mark all as processed +;; +(define (mark-processed conn artifact-ids) + ;; (pg:with-transaction + ;; conn + ;; (lambda () + (for-each + (lambda (id) + (get-one + conn + "UPDATE artifacts SET processed=1 WHERE id=?;" id)) + artifact-ids)) ;; x)) + +;; a generic artifact getter, gets from the artifacts db +;; +(define (get-artifacts conn ptypes) + (let* ((ptypes-str (if (null? ptypes) + "" + (conc " WHERE artifact_type IN ('" (string-intersperse ptypes ",") "') "))) + (qry-str (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts" ptypes-str))) + (map vector->list (get-rows conn qry-str)))) + +;; make a report of the artifacts in the db +;; ptypes of '() gets all artifacts +;; display-fields +;; +(define (make-report dest conn artifactspec display-fields . ptypes) + (let* (;; (conn (dbi:db-conn (s:db))) + (all-rows (get-artifacts conn ptypes)) + (all-artifacts (flatten-all + all-rows + artifactspec + 'id 'group-id 'uuid 'parent 'artifact-type 'artifact 'processed)) + (by-uuid (let ((ht (make-hash-table))) + (for-each + (lambda (artifact) + (let ((uuid (alist-ref 'uuid artifact))) + (hash-table-set! ht uuid artifact))) + all-artifacts) + ht)) + (by-parent (let ((ht (make-hash-table))) + (for-each + (lambda (artifact) + (let ((parent (alist-ref 'parent artifact))) + (hash-table-set! ht parent (cons artifact (hash-table-ref/default ht parent '()))))) + all-artifacts) + ht)) + (oup (if dest (open-output-file dest) (current-output-port)))) + + (with-output-to-port + oup + (lambda () + (print "digraph megatest_state_status { + // ranksep=0.05 + rankdir=LR; + node [shape=\"box\"]; +") + ;; first all the names + (for-each + (lambda (artifact) + (let* ((uuid (alist-ref 'uuid artifact)) + (shortuuid (substring uuid 0 4)) + (type (alist-ref 'artifact-type artifact)) + (processed (alist-ref 'processed artifact))) + + (print "\"" uuid "\" [label=\"" shortuuid ", (" + type ", " + (if processed "processed" "not processed") ")") + (for-each + (lambda (key-field) + (let ((val (alist-ref key-field artifact))) + (if val + (print key-field "=" val)))) + display-fields) + (print "\" ];"))) + all-artifacts) + ;; now for parent-child relationships + (for-each + (lambda (artifact) + (let ((uuid (alist-ref 'uuid artifact)) + (parent (alist-ref 'parent artifact))) + (if (not (equal? parent "")) + (print "\"" parent "\" -> \"" uuid"\";")))) + all-artifacts) + + (print "}") + )) + (if dest + (begin + (close-output-port oup) + (system "dot -Tpdf out.dot -o out.pdf"))) + + )) + +;;====================================================================== +;; Read ref artifacts into a vector < laststr hash table > +;;====================================================================== + + + +;;====================================================================== +;; Read/write packets to files (convience functions) +;;====================================================================== + +;; write alist to a artifact file +;; +(define (write-alist->artifact targdir dat #!key (artifactspec '())(ptype #f)) + (let-values (((uuid artifact)(alist->artifact dat artifactspec ptype: ptype))) + (with-output-to-file (conc targdir "/" uuid ".artifact") + (lambda () + (print artifact))) + uuid)) ;; return the uuid + +;; read artifact into alist +;; +(define (read-artifact->alist artifact-file #!key (artifactspec #f)) + (artifact->alist (with-input-from-file + artifact-file + read-string) + artifactspec: artifactspec)) + +;;====================================================================== +;; File utils, stuff useful for file management +;;====================================================================== + +(define (file-get-sha1 fname) + (let* ((sha1-res (run/strings (sha1sum ,fname)))) + (car (string-split (car sha1-res))))) + +(define (link-or-copy srcf destf) + (or (handle-exceptions + exn + #f + (file-link srcf destf)) + (if (file-exists? destf) + (print "NOTE: destination already exists, skipping copy.") + (copy-file srcf destf)))) + +;; (define (files-diff file1 file2) +;; (let* ((diff-res (with-input-from-port +;; (run/port (diff "-q" ,file1 ,file2)) +;; (lambda () +;; (let* ((res (read-line))) +;; (read-lines) +;; res))))) +;; (car (string-split sha1-res)))) +;; + + +(define (check-same file1 file2) + (cond + ((not (and (file-exists? file1)(file-exists? file2))) #f) + ((not (equal? (file-size file1)(file-size file2))) #f) + (else + (let-values (((status run-ok process-id) + (run (diff "-q" ,file1 ,file2)))) + status)))) + +(define *pcache* (make-hash-table)) +(define (get-device dir) + (let ((indat (or (hash-table-ref/default *pcache* dir #f) + (let* ((inp (open-input-pipe (conc "df --output=source \""dir"\""))) + (res (read-lines inp))) + (close-input-port inp) + (hash-table-set! *pcache* dir res) + res)))) + (cadr indat))) + +(define (same-partition? dir1 dir2) + (equal? (get-device dir1)(get-device dir2))) + +(define (link-if-same-partition file1 file2) + (let* ((dir1 (pathname-directory file1)) + (dir2 (pathname-directory file2)) + (f1 (pathname-file file1)) + (f2 (pathname-file file2))) + (if (same-partition? dir1 dir2) + (let* ((tmpname (conc "."f2"-"(current-seconds)))) + ;; this steps needs to be executed as actual user + (move-file file2 (conc dir1 "/" tmpname)) + (file-link file1 file2) + (delete-file (conc dir1 "/" tmpname)))))) + +(define (uuid-first-two-letters sha1sum) + (substring sha1sum 0 2)) + +(define (uuid-remaining-letters sha1sum) + (let ((slen (string-length sha1sum))) + (substring sha1sum 2 slen))) + +(define (archive-dest destd sha1sum) + (let* ((subdir (uuid-first-two-letters sha1sum)) ;; (substring sha1sum 0 2)) + ;; (slen (string-length sha1sum)) + (rem sha1sum #;(uuid-remaining-letters sha1sum)) ;; (substring sha1sum 3 slen)) + (full-dest-dir (conc destd"/"subdir)) + (full-dest-file (conc full-dest-dir"/"rem))) + (if (not (directory-exists? full-dest-dir)) + (create-directory full-dest-dir #t)) + full-dest-file)) + +(define (write-to-archive data destd #!optional (nextnum #f)) + (let* ((sha1sum (calc-sha1 data)) + (full-dest (conc (archive-dest destd sha1sum) + (if nextnum (conc "."nextnum) "")))) + (if (file-exists? full-dest) + (if (equal? (string-intersperse (with-input-from-file full-dest read-lines) "\n") + data) + (begin + ;; (print "INFO: data already exists in "full-dest" and is identical") + sha1sum) + (let ((nextnum (if nextnum (+ nextnum 1) 0))) + (print "WARN: data already exists in "full-dest" but is different! Trying again...") + (write-to-archive data destd nextnum))) + (begin + (with-output-to-file + full-dest + (lambda () + (print data))) + sha1sum)))) ;; BUG? Does print munge data? + +;; copy srcf with sha1sum aabc... to aa/bc... +;; +(define (archive-copy srcf destd sha1sum) + (let* ((full-dest-file (archive-dest destd sha1sum))) + (let loop ((trynum 0)) + (let ((dest-name (if (> trynum 0) + (conc full-dest-file"-"trynum) + full-dest-file))) + (cond + ((not (file-exists? srcf)) #f) ;; this should be an error? + ((and (file-exists? srcf) + (file-exists? dest-name)) + (if (check-same srcf dest-name) + (link-if-same-partition dest-name srcf) + (loop (+ trynum 1)))) ;; collisions are rare, this protects against them + ((not (file-exists? dest-name)) + (link-or-copy srcf dest-name)) + (else #f)))))) + +;; multi-glob +(define (multi-glob globstrs inpath) + ;; (print "multi-glob: "globstrs", "inpath) + (if (equal? inpath "") + globstrs + (let* ((parts (string-split inpath "/" #t)) + (nextpart (car parts)) + (remaining (string-intersperse (cdr parts) "/"))) + (if (and (equal? nextpart "") ;; this must be a leading / meaning root directory + (null? globstrs)) + (multi-glob '("/") remaining) + (begin + ;; (print "nextpart="nextpart", remaining="remaining) + (apply append + (map (lambda (gstr) + (let* ((pathstr (conc gstr"/"nextpart)) + (pathstrs (glob pathstr))) + ;; (print "pathstr="pathstr) + (multi-glob pathstrs remaining))) + globstrs))))))) + + +;; perm[/user:group]: +;; DDD - octal perm (future expansion) +;; - - use umask/defacto perms (i.e. don't actively do anything) +;; x - mark as executable +;; +;; Cards: +;; file: f perm fname +;; directory: d perm fname artifactid +;; link: l perm lname destpath +;; +;; NOTE: cards are kept as (C . "value") +;; +;; given a directory path, ignore list and artifact store (hash-table): +;; 1. create sha1 tree at dest (e.g. aa/b3a7 ...) +;; 2. create artifact for each dir +;; - cards for all files +;; - cards for files that are symlinks or executables +;; 3. return (artifactid . artifact) +;; +;; NOTES: +;; Use destdir of #f to not create sha1 tree +;; Hard links will be used if srcdir and destdir appear to be same partion +;; +;; (alist->artifact adat aspec #!key (ptype #f)) +;; +;; +;; (load "../../artifacts/artifacts.scm")(import big-chicken srfi-69 artifacts)(define dirdat (make-hash-table)) +;; (capture-dir ".." ".." "/tmp/junk" '() dirdat) +;; +;; [procedure] (file-type FILE [LINK [ERROR]]) +;; Returns the file-type for FILE, which should be a filename, a file-descriptor or a port object. If LINK is given and true, symbolic-links are not followed: +;; +;; regular-file +;; directory +;; fifo +;; socket +;; symbolic-link +;; character-device +;; block-device +;; Note that not all types are supported on every platform. If ERROR is given and false, then file-type returns #f if the file does not exist; otherwise, it signals an error. +;; +;; +(define (capture-dir curr-dir src-dir dest-dir ignore-list artifacts all-seen) + (let* ((dir-dat (directory-fold + (lambda (fname res) ;; res is a list of artifact cards + (let* ((fullname (conc curr-dir"/"fname))) + ;; (print "INFO: processing "fullname) + (if (hash-table-ref/default all-seen fullname #f) ;; something circular going on + (begin + (print "WARNING: possible circular link(s) "fullname) + res) + (let* ((ftype (file-type fullname #t #f))) + (hash-table-set! all-seen fullname ftype) + (cons + (case ftype ;; get the card + ((directory) ;; (directory? fullname) + (let* ((new-curr-dir (conc curr-dir"/"fname)) + (new-src-dir (conc src-dir"/"fname))) + (let* ((dir-dat (capture-dir new-curr-dir new-src-dir + dest-dir ignore-list artifacts all-seen)) + (a-id (car dir-dat)) + (artf (cdr dir-dat))) + (hash-table-set! artifacts a-id artf) + (cons 'd (conc "- "a-id" "fname))))) ;; the card + ((symbolic-link) ;; (symbolic-link? fullname) + (let ((ldest (read-symbolic-link fullname))) + (cons 'l (conc "- "fname"/"ldest)))) ;; delimit link name from dest with / + ((regular-file) ;; must be a file + (let* ((start (current-seconds)) + (sha1sum (file-get-sha1 fullname)) + (perms (if (file-executable? fullname) "x" "-"))) + (let ((runtime (- (current-seconds) start))) + (if (> runtime 1) + (print "INFO: file "fullname" took "runtime" seconds to calculate sha1."))) + (if dest-dir + (archive-copy fullname dest-dir sha1sum)) + (cons 'f (conc perms " "sha1sum" "fname)))) + (else + (print "WARNING: file "fullname" of type "ftype" is NOT supported and will converted to empty file.") + (let* ((sha1sum (write-to-archive "" dest-dir))) + (cons 'f (conc "- "sha1sum" "fname))))) + res))))) + '() src-dir #:dotfiles? #t))) ;; => (values srcdir_artifact sub_artifacts_list) + ;; (print "dir-dat: " dir-dat) + (let-values (((a-id artf) + (alist->artifact dir-dat '() ptype: 'd no-d: #t))) + (hash-table-set! artifacts a-id artf) + (cons a-id artf)))) + +;; maybe move this into artifacts? +;; +;; currently moves *.artifact into a bundle and moves the artifacts into attic +;; future: move artifacts under 1 meg in size into bundle up to 10 meg in size +;; +(define (artifact-rollup bundle-dir) ;; cfg storepath) + ;; (let* ((bundle-dir (calc-bundle-dir cfg storepath))) + (let* ((bundles (glob (conc bundle-dir"/*.bundle"))) + (artifacts (glob (conc bundle-dir"/*.artifact")))) + (if (> (length artifacts) 30) ;; rollup only if > 30 artifacts + ;; if we have unbundled artifacts, bundle them + (let* ((ht (read-artifacts-into-hash #f artifacts: artifacts)) + (bundle (hash-of-artifacts->bundle ht))) + (write-bundle bundle bundle-dir) + (create-directory (conc bundle-dir"/attic") #t) + (for-each + (lambda (full-fname) + (let* ((fname (pathname-strip-directory full-fname)) + (newname (conc bundle-dir"/attic/"fname))) + (move-file full-fname newname #t))) + artifacts) + (conc "bundled "(length artifacts))) + "not enough artifacts to bundle"))) + +;; if destfile is a directory then calculate the sha1sum of the bundle and store it +;; by .bundle +;; +;; incoming dat is pure text (bundle already sorted and appended: +;; +(define (write-bundle bdl-data destdir) + (let* ((bdl-uuid (calc-sha1 bdl-data))) + (with-output-to-file + (conc destdir"/"bdl-uuid".bundle") + (lambda () + (print bdl-data))))) + +;; minimal (and hopefully fast) artifact reader +;; TODO: Add check of shar sum. +;; +(define (minimal-artifact-read fname) + (let* ((indat (with-input-from-file fname read-lines))) + (if (null? indat) + (values #f (conc "did not find an artifact in "fname)) + (let* ((zcard (last indat)) + (cardk (substring zcard 0 1)) + (cardv (substring zcard 2 (string-length zcard)))) + (if (equal? cardk "Z") + (values cardv (string-intersperse indat "\n")) + (values #f (conc fname" is not a valid artifact"))))))) + +;; read artifacts from directory into hash +;; NOTE: support for max-count not implemented yet +;; +(define (read-artifacts-into-hash dir #!key (artifacts #f) (max-count #f)(ht #f)) + (let* ((artifacts (or artifacts + (glob (conc dir"/*.artifact")))) + (ht (or ht (make-hash-table)))) + (for-each + (lambda (fname) + (let-values (((uuid afct) + (minimal-artifact-read fname))) + (hash-table-set! ht uuid afct))) + artifacts) + ht)) + +;; ht is: +;; uuid => artifact text +;; use write-bundle to put result into a bundle file +;; +(define (hash-of-artifacts->bundle ht) + (fold (lambda (k res) + (let* ((v (hash-table-ref ht k))) + (if res + (conc res"\n"v) + v))) + #f + (sort (hash-table-keys ht) string<=?))) + +;; minimal artifact to alist +;; +(define (minimal-artifact->alist afact) + (let* ((lines (string-split afact "\n"))) + (map (lambda (a) + (let* ((key (string->symbol (substring a 0 1))) + (sl (string-length a)) + (val (if (> sl 2) + (substring a 2 sl) + ""))) + (cons key val))) + lines))) + +;; some accessors for common cards +(define (afact-get-D afact) + (let ((dval (alist-ref 'D afact))) + (if dval + (string->number dval) + #f))) + +(define (afact-get-T afact) ;; get the artifact type as a symbol + (let ((val (alist-ref 'T afact))) + (if val + (string->symbol val) + val))) + +(define (afact-get-Z afact) + (alist-ref 'Z afact)) + +(define (afact-get afact key default) + (or (alist-ref key afact) + default)) + +(define (afact-get-number/default afact key default) + (let ((val (alist-ref key afact))) + (if val + (or (string->number val) default) ;; seems wrong + default))) + +;; bundles are never big and reading into memory for processing is fine +;; +(define (read-bundle srcfile #!optional (mode 'uuid-raw)) + (let* ((indat (with-input-from-file srcfile read-lines))) + (let loop ((tail indat) + (dat '()) ;; artifact being extracted + (res '())) ;; list of artifacts + (if (null? tail) + (reverse res) ;; last dat should be empty list + (let* ((curr-line (car tail))) + (let-values (((ctype cdata) + (card->type/value curr-line))) + (let* ((is-z-card (eq? 'Z ctype)) + (new-dat (cons (case mode + ((uuid-raw) curr-line) + (else (cons ctype cdata))) + dat))) + (if is-z-card + (loop (cdr tail) ;; done with this artifact + '() + (cons (case mode + ((uuid-raw) (cons cdata (string-intersperse (reverse new-dat) "\n"))) + (else (reverse new-dat))) + res)) + (loop (cdr tail) + new-dat + res))))))))) + + +;; find all .bundle and .artifacts files in bundle-dir +;; and inport them into sqlite handle adb +;; +(define (refresh-artifacts-db adb bundle-dir) + (let* ((bundles (glob (conc bundle-dir"/*.bundle"))) + (artifacts (glob (conc bundle-dir"/*.artifact"))) + (uuids (get-all-uuids adb 'hash))) + (with-transaction + adb + (lambda () + (for-each + (lambda (bundle-file) + ;; (print "Importing artifacts from "bundle-file) + (let* ((bdat (read-bundle bundle-file 'uuid-raw)) + (count 0) + (inc (lambda ()(set! count (+ count 1))))) + (for-each + (lambda (adat) + (match + adat + ((zval . artifact) + (if (not (hash-table-exists? uuids zval)) + (begin + ;; (print "INFO: importing new artifact "zval" from bundle "bundle-file) + (inc) + (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);" + zval artifact) + (hash-table-set! uuids zval #t)))) + (else + (print "ERROR: Bad artifact data "adat)))) + bdat) + (print "INFO: imported "count" artifacts from "bundle-file))) + bundles) + (for-each + (lambda (artifact-file) + ;; (print "Importing artifact from "artifact-file) + (let-values (((uuid artifact) (minimal-artifact-read artifact-file))) + (if uuid + (if (not (hash-table-exists? uuids uuid)) + (begin + ;; (print "INFO: importing new artifact "uuid" from "artifact-file) + (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);" + uuid artifact) + (hash-table-set! uuids uuid #t))) + (print "Bad artifact in "artifact-file)))) + artifacts))))) + +;;====================================================================== +;; Artifacts db cache +;;====================================================================== + +;; artifacts +;; id SERIAL PRIMARY KEY, +;; uuid TEXT NOT NULL, +;; artifact TEXT NOT NULL +;; +;; parents +;; id INTEGER REFERENCES artids.id, -- +;; parent_id REFERENCES artids.id +;; +;; schema is list of SQL statements - can be used to extend db with more tables +;; +(define (open-artifacts-db dbpath dbfile #!key (schema '())) + (let* ((dbfname (conc dbpath "/" dbfile)) + (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f))) + (adb (open-database dbfname))) + (set-busy-handler! adb (make-busy-timeout 10000)) + (execute adb "PRAGMA synchronous = 0;") + (if (not dbexists) + (with-transaction + adb + (lambda () + (for-each + (lambda (stmt) + (execute adb stmt)) + (append `("CREATE TABLE IF NOT EXISTS artifacts + (id INTEGER PRIMARY KEY, + uuid TEXT NOT NULL, + artifact TEXT NOT NULL)" + + "CREATE TABLE IF NOT EXISTS parents + (id INTEGER REFERENCES artifacts(id) NOT NULL, + parent_id INTEGER REFERENCES artifacts(id) NOT NULL)") + schema))))) + adb)) + +(define (generate-year-month-name #!optional (seconds #f)) + (let* ((curr-time (seconds->local-time (or seconds (current-seconds))))) + (time->string curr-time "%Y%m"))) + +;; I don't like this function. TODO: remove the +;; mode and option to return ht. Use instead the +;; get-all-artifacts below +;; +(define (get-all-uuids adb #!optional (mode #f)) + (let* ((res (fold-row + (lambda (res uuid) + (cons uuid res)) + '() + adb + "SELECT uuid FROM artifacts;"))) + (case mode + ((hash) + (let* ((ht (make-hash-table))) + (for-each + (lambda (uuid) + (hash-table-set! ht uuid #t)) + res) + ht)) + (else res)))) + +;; returns raw artifacts (i.e. NOT alists but instead plain text) +(define (get-all-artifacts adb) + (let* ((ht (make-hash-table))) + (for-each-row + (lambda (id uuid artifact) + (hash-table-set! ht uuid `(,id ,uuid ,artifact))) + adb + "SELECT id,uuid,artifact FROM artifacts;") + ht)) + +;; given a bundle-dir copy or create to /tmp and open +;; the YYMM.db file and hand the handle to the given proc +;; NOTE: we operate in /tmp/ to accomodate users on NFS +;; where slamming Unix locks at an NFS filer can cause +;; locking fails. Eventually this /tmp behavior will be +;; configurable. +;; +(define (with-todays-adb bundle-dir proc) + (let* ((dbname (conc (generate-year-month-name) ".db")) + (destname (conc bundle-dir"/"dbname)) + (tmparea (conc "/tmp/"(current-user-name)"-"(calc-sha1 bundle-dir))) + (tmpname (conc tmparea"/"dbname)) + (lockfile (conc destname".update-in-progress"))) + ;; (print "with-todays-adb, bundle-dir: "bundle-dir", dbname: "dbname", destname: "destname",\n tmparea: " tmparea", lockfile: "lockfile) + (if (not (file-exists? tmparea))(create-directory tmparea #t)) + (let loop ((count 0)) + (if (file-exists? lockfile) + (if (< count 30) ;; aproximately 30 seconds + (begin + (sleep 1) + (loop (+ 1 count))) + (print "ERROR: "lockfile" exists, proceeding anyway")) + (if (file-exists? destname) + (begin + (copy-file destname tmpname #t) + (copy-file destname lockfile #t))))) + (let* ((adb (open-artifacts-db tmparea dbname)) + (res (proc adb))) + (finalize! adb) + (copy-file tmpname destname #t) + (delete-file* lockfile) + res))) + +) ;; module artifacts + +;; ATTIC + ADDED artifacts/artifacts.setup Index: artifacts/artifacts.setup ================================================================== --- /dev/null +++ artifacts/artifacts.setup @@ -0,0 +1,11 @@ +;; Copyright 2007-2017, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;;;; pkts.setup +(standard-extension 'pkts "1.0") ADDED artifacts/artifactsrec.scm Index: artifacts/artifactsrec.scm ================================================================== --- /dev/null +++ artifacts/artifactsrec.scm @@ -0,0 +1,196 @@ +(define-syntax define-record-type + (syntax-rules () + ((define-record-type type + (constructor constructor-tag ...) + predicate + (field-tag accessor . more) ...) + (begin + (define type + (make-record-type 'type '(field-tag ...))) + (define constructor + (record-constructor type '(constructor-tag ...))) + (define predicate + (record-predicate type)) + (define-record-field type field-tag accessor . more) + ...)))) + +; An auxilliary macro for define field accessors and modifiers. +; This is needed only because modifiers are optional. + +(define-syntax define-record-field + (syntax-rules () + ((define-record-field type field-tag accessor) + (define accessor (record-accessor type 'field-tag))) + ((define-record-field type field-tag accessor modifier) + (begin + (define accessor (record-accessor type 'field-tag)) + (define modifier (record-modifier type 'field-tag)))))) + +; Record types + +; We define the following procedures: +; +; (make-record-type ) -> +; (record-constructor ) -> +; (record-predicate ) -> +; (record-accessor ) -> +; (record-modifier ) -> +; where +; ( ...) -> +; ( ) -> +; ( ) -> +; ( ) -> + +; Record types are implemented using vector-like records. The first +; slot of each record contains the record's type, which is itself a +; record. + +(define (record-type record) + (record-ref record 0)) + +;---------------- +; Record types are themselves records, so we first define the type for +; them. Except for problems with circularities, this could be defined as: +; (define-record-type :record-type +; (make-record-type name field-tags) +; record-type? +; (name record-type-name) +; (field-tags record-type-field-tags)) +; As it is, we need to define everything by hand. + +(define :record-type (make-record 3)) +(record-set! :record-type 0 :record-type) ; Its type is itself. +(record-set! :record-type 1 ':record-type) +(record-set! :record-type 2 '(name field-tags)) + +; Now that :record-type exists we can define a procedure for making more +; record types. + +(define (make-record-type name field-tags) + (let ((new (make-record 3))) + (record-set! new 0 :record-type) + (record-set! new 1 name) + (record-set! new 2 field-tags) + new)) + +; Accessors for record types. + +(define (record-type-name record-type) + (record-ref record-type 1)) + +(define (record-type-field-tags record-type) + (record-ref record-type 2)) + +;---------------- +; A utility for getting the offset of a field within a record. + +(define (field-index type tag) + (let loop ((i 1) (tags (record-type-field-tags type))) + (cond ((null? tags) + (error "record type has no such field" type tag)) + ((eq? tag (car tags)) + i) + (else + (loop (+ i 1) (cdr tags)))))) + +;---------------- +; Now we are ready to define RECORD-CONSTRUCTOR and the rest of the +; procedures used by the macro expansion of DEFINE-RECORD-TYPE. + +(define (record-constructor type tags) + (let ((size (length (record-type-field-tags type))) + (arg-count (length tags)) + (indexes (map (lambda (tag) + (field-index type tag)) + tags))) + (lambda args + (if (= (length args) + arg-count) + (let ((new (make-record (+ size 1)))) + (record-set! new 0 type) + (for-each (lambda (arg i) + (record-set! new i arg)) + args + indexes) + new) + (error "wrong number of arguments to constructor" type args))))) + +(define (record-predicate type) + (lambda (thing) + (and (record? thing) + (eq? (record-type thing) + type)))) + +(define (record-accessor type tag) + (let ((index (field-index type tag))) + (lambda (thing) + (if (and (record? thing) + (eq? (record-type thing) + type)) + (record-ref thing index) + (error "accessor applied to bad value" type tag thing))))) + +(define (record-modifier type tag) + (let ((index (field-index type tag))) + (lambda (thing value) + (if (and (record? thing) + (eq? (record-type thing) + type)) + (record-set! thing index value) + (error "modifier applied to bad value" type tag thing))))) + +Records + +; This implements a record abstraction that is identical to vectors, +; except that they are not vectors (VECTOR? returns false when given a +; record and RECORD? returns false when given a vector). The following +; procedures are provided: +; (record? ) -> +; (make-record ) -> +; (record-ref ) -> +; (record-set! ) -> +; +; These can implemented in R5RS Scheme as vectors with a distinguishing +; value at index zero, providing VECTOR? is redefined to be a procedure +; that returns false if its argument contains the distinguishing record +; value. EVAL is also redefined to use the new value of VECTOR?. + +; Define the marker and redefine VECTOR? and EVAL. + +(define record-marker (list 'record-marker)) + +(define real-vector? vector?) + +(define (vector? x) + (and (real-vector? x) + (or (= 0 (vector-length x)) + (not (eq? (vector-ref x 0) + record-marker))))) + +; This won't work if ENV is the interaction environment and someone has +; redefined LAMBDA there. + +(define eval + (let ((real-eval eval)) + (lambda (exp env) + ((real-eval `(lambda (vector?) ,exp)) + vector?)))) + +; Definitions of the record procedures. + +(define (record? x) + (and (real-vector? x) + (< 0 (vector-length x)) + (eq? (vector-ref x 0) + record-marker))) + +(define (make-record size) + (let ((new (make-vector (+ size 1)))) + (vector-set! new 0 record-marker) + new)) + +(define (record-ref record index) + (vector-ref record (+ index 1))) + +(define (record-set! record index value) + (vector-set! record (+ index 1) value)) ADDED artifacts/tests/run.scm Index: artifacts/tests/run.scm ================================================================== --- /dev/null +++ artifacts/tests/run.scm @@ -0,0 +1,139 @@ +(use test) + +;; (use (prefix pkts pkts:)) +(use pkts (prefix dbi dbi:)) +;; (use trace)(trace sdat->alist pkt->alist) + +(if (file-exists? "queue.db")(delete-file "queue.db")) + +(test-begin "pkts and pkt archives") + +;;====================================================================== +;; Basic pkt creation, parsing and conversion routines +;;====================================================================== + +(test-begin "basic packets") +(test #f '(A "This is a packet") (let-values (((t v) + (card->type/value "A This is a packet"))) + (list t v))) +(test #f "A A\nZ 664449e7299e0065a3e25c138ccef2df13ba291e" + (let-values (((uuid res) + (add-z-card '("A A")))) + res)) +(test #f '("CC C++" "D 1486332719.0" "a A" "b C")(sort (construct-sdat 'b "C" 'a "A" 'CC "C++" 'D 1486332719.0) + string<=?)) +(define pkt-example #f) +(test #f "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" + (let-values (((uuid res) + (construct-pkt 'b "C" 'a "A" 'CC "C++" 'D 1486332719.0))) + (set! pkt-example (cons uuid res)) + res)) +(test-end "basic packets") + +;;====================================================================== +;; Sqlite and postgresql based queue of pkts +;;====================================================================== + +(test-begin "pkt queue") +(define db #f) +(test #f 'sqlite3 (let ((dbh (open-queue-db "." "queue.db"))) + (set! db dbh) + (dbi:db-dbtype dbh))) +(test #f (cdr pkt-example) + (begin + (add-to-queue db (cdr pkt-example)(car pkt-example) 'basic #f 0) + (lookup-by-uuid db (car pkt-example) 0))) +(test #f (cdr pkt-example) + (lookup-by-id db 1)) +(test #f 1 (length (find-pkts db '(basic) '()))) + +(test-end "pkt queue") + + +;;====================================================================== +;; Process groups of pkts +;;====================================================================== + +(test-begin "lists of packets") +(test #f '((apkt . #f) (id . 1) (group-id . 2) (uuid . 3) (parent . 4) (pkt-type . 5)) ;; ((id . 1) (group-id . 2) (uuid . 3) (parent . 4) (pkt-type . 5)) + (dblst->dpkts '(1 2 3 4 5))) +(test #f '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (processed . 0))) + ;; '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + ;; '(((id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + (get-dpkts db '(basic) 0 #f)) +(test #f '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (processed . 0))) + ;; '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + ;; '(((id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + (get-not-processed-pkts db 0 'basic 1000 0)) +(test-end "lists of packets") + +(test-begin "pkts as alists") +(define pktspec '((posting . ((title . t) ;; NOTE: same as ((posting (title . t)(url . u)(blub . b)) ... + (url . u) + (blurb . b))) + (comment . ((comment . c) + (score . s))) + (basic . ((b-field . b) + (a-field . a))))) +(define pktlst (find-pkts db '(basic) '())) +(define dpkt (car pktlst)) +(test #f "A" (get-value 'a-field dpkt pktspec)) + +(test #f "C" (alist-ref 'b-field (dpkt->alist dpkt pktspec))) + +(define basic-spec '((nada (foo . b)(bar . f))(basic (foo . f)(bar . b)))) +(define test-pkt '((foo . "fooval")(bar . "barval"))) +(let*-values (((u p) (alist->pkt test-pkt basic-spec ptype: 'basic)) + ((apkt) (pkt->alist p)) + ((bpkt) (pkt->alist p pktspec: basic-spec))) + (test #f "fooval" (alist-ref 'f apkt)) + (test #f "fooval" (alist-ref 'foo bpkt)) + (test #f #f (alist-ref 'f bpkt))) + +(test-end "pkts as alists") + +(test-begin "descendents and ancestors") + +(define (get-uuid pkt)(alist-ref 'uuid pkt)) + +;; add a child to 263e +(let-values (((uuid pkt) + (construct-pkt 'x "X" 'y "Y" 'P "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" + 'D "1486332719.0"))) + (add-to-queue db pkt uuid 'basic "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" 0)) + +(test #f '("263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" "818fe30988c9673441b8f203972a8bda6af682f8") + (map (lambda (x)(alist-ref 'uuid x)) + (get-descendents + db 0 + "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + +(test #f '("263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" "818fe30988c9673441b8f203972a8bda6af682f8") + (map (lambda (x)(alist-ref 'uuid x)) + (get-ancestors + db 0 + "818fe30988c9673441b8f203972a8bda6af682f8"))) + +(test-end "descendents and ancestors") + +(test-end "pkts and pkt archives") + +(test-begin "pktsdb") + +(define spec '((tests (testname n TEXT) + (testpath p TEXT) + (duration d INTEGER)))) +;; (define pktsdb (make-pktdb)) +;; (pktdb-pktsdb-spec-set! pktsdb spec) + +(define pktsdb #f) + +(test #f #t (dbi:database? (let ((pdb (pktsdb-open "test.db" spec))) + (set! pktsdb pdb) + (pktdb-conn pdb)))) +;; (pp (pktdb-pktspec pktsdb)) +(test #f #t (pktsdb-add-record pktsdb 'tests '((testname . "test1")))) + +(pktsdb-close pktsdb) + +(test-end "pktsdb")