;;; org-roam-db.el --- Org-roam database API -*- coding: utf-8; lexical-binding: t; -*- ;; Copyright © 2020 Jethro Kuan ;; Author: Jethro Kuan ;; URL: https://github.com/org-roam/org-roam ;; Keywords: org-mode, roam, convenience ;; Version: 1.2.0 ;; Package-Requires: ((emacs "26.1") (dash "2.13") (f "0.17.2") (s "1.12.0") (org "9.3") (emacsql "3.0.0") (emacsql-sqlite3 "1.0.0")) ;; This file is NOT part of GNU Emacs. ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ;; Boston, MA 02110-1301, USA. ;;; Commentary: ;; ;; This library is provides the underlying database api to org-roam ;; ;;; Code: ;;;; Library Requires (eval-when-compile (require 'subr-x)) (require 'emacsql) (require 'emacsql-sqlite3) (require 'seq) (require 'org-roam-macs) (defvar org-roam-directory) (defvar org-roam-verbose) (defvar org-roam-file-name) (declare-function org-roam--org-roam-file-p "org-roam") (declare-function org-roam--extract-titles "org-roam") (declare-function org-roam--extract-ref "org-roam") (declare-function org-roam--extract-tags "org-roam") (declare-function org-roam--extract-headlines "org-roam") (declare-function org-roam--extract-links "org-roam") (declare-function org-roam--list-all-files "org-roam") (declare-function org-roam--path-to-slug "org-roam") (declare-function org-roam-buffer--update-maybe "org-roam-buffer") ;;;; Options (defcustom org-roam-db-location nil "The full path to file where the Org-roam database is stored. If this is non-nil, the Org-roam sqlite database is saved here. It is the user's responsibility to set this correctly, especially when used with multiple Org-roam instances." :type 'string :group 'org-roam) (defcustom org-roam-db-gc-threshold gc-cons-threshold "The value to temporarily set the `gc-cons-threshold' threshold to. During large, heavy operations like `org-roam-db-build-cache', many GC operations happen because of the large number of temporary structures generated (e.g. parsed ASTs). Temporarily increasing `gc-cons-threshold' will help reduce the number of GC operations, at the cost of temporary memory usage. This defaults to the original value of `gc-cons-threshold', but tweaking this number may lead to better overall performance. For example, to reduce the number of GCs, one may set it to a large value like `most-positive-fixnum'." :type 'int :group 'org-roam) (defconst org-roam-db--version 7) (defvar org-roam-db--connection (make-hash-table :test #'equal) "Database connection to Org-roam database.") ;;;; Core Functions (defun org-roam-db--get () "Return the sqlite db file." (or org-roam-db-location (expand-file-name "org-roam.db" org-roam-directory))) (defun org-roam-db--get-connection () "Return the database connection, if any." (gethash (file-truename org-roam-directory) org-roam-db--connection)) (defun org-roam-db () "Entrypoint to the Org-roam sqlite database. Initializes and stores the database, and the database connection. Performs a database upgrade when required." (unless (and (org-roam-db--get-connection) (emacsql-live-p (org-roam-db--get-connection))) (let* ((db-file (org-roam-db--get)) (init-db (not (file-exists-p db-file)))) (make-directory (file-name-directory db-file) t) (let ((conn (emacsql-sqlite3 db-file))) (set-process-query-on-exit-flag (emacsql-process conn) nil) (puthash (file-truename org-roam-directory) conn org-roam-db--connection) (when init-db (org-roam-db--init conn)) (let* ((version (caar (emacsql conn "PRAGMA user_version"))) (version (org-roam-db--update-maybe conn version))) (cond ((> version org-roam-db--version) (emacsql-close conn) (user-error "The Org-roam database was created with a newer Org-roam version. " "You need to update the Org-roam package")) ((< version org-roam-db--version) (emacsql-close conn) (error "BUG: The Org-roam database scheme changed %s" "and there is no upgrade path"))))))) (org-roam-db--get-connection)) ;;;; Entrypoint: (org-roam-db-query) (defun org-roam-db-query (sql &rest args) "Run SQL query on Org-roam database with ARGS. SQL can be either the emacsql vector representation, or a string." (if (stringp sql) (emacsql (org-roam-db) (apply #'format sql args)) (apply #'emacsql (org-roam-db) sql args))) ;;;; Schemata (defconst org-roam-db--table-schemata '((files [(file :unique :primary-key) (hash :not-null) (meta :not-null)]) (headlines [(id :unique :primary-key) (file :not-null)]) (links [(from :not-null) (to :not-null) (type :not-null) (properties :not-null)]) (tags [(file :unique :primary-key) (tags)]) (titles [(file :not-null) title]) (refs [(ref :unique :not-null) (file :not-null) (type :not-null)]))) (defun org-roam-db--init (db) "Initialize database DB with the correct schema and user version." (emacsql-with-transaction db (pcase-dolist (`(,table . ,schema) org-roam-db--table-schemata) (emacsql db [:create-table $i1 $S2] table schema)) (emacsql db (format "PRAGMA user_version = %s" org-roam-db--version)))) (defun org-roam-db--update-maybe (db version) "Upgrades the database schema for DB, if VERSION is old." (emacsql-with-transaction db 'ignore (if (< version org-roam-db--version) (progn (org-roam-message (format "Upgrading the Org-roam database from version %d to version %d" version org-roam-db--version)) (org-roam-db-build-cache t)))) version) (defun org-roam-db--close (&optional db) "Closes the database connection for database DB. If DB is nil, closes the database connection for the database in the current `org-roam-directory'." (unless db (setq db (org-roam-db--get-connection))) (when (and db (emacsql-live-p db)) (emacsql-close db))) (defun org-roam-db--close-all () "Closes all database connections made by Org-roam." (dolist (conn (hash-table-values org-roam-db--connection)) (org-roam-db--close conn))) ;;;; Database API ;;;;; Initialization (defun org-roam-db--initialized-p () "Whether the Org-roam cache has been initialized." (and (file-exists-p (org-roam-db--get)) (> (caar (org-roam-db-query [:select (funcall count) :from titles])) 0))) (defun org-roam-db--ensure-built () "Ensures that Org-roam cache is built." (unless (org-roam-db--initialized-p) (error "[Org-roam] your cache isn't built yet! Please run org-roam-db-build-cache"))) ;;;;; Clearing (defun org-roam-db-clear () "Clears all entries in the Org-roam cache." (interactive) (when (file-exists-p (org-roam-db--get)) (dolist (table (mapcar #'car org-roam-db--table-schemata)) (org-roam-db-query `[:delete :from ,table])))) (defun org-roam-db--clear-file (&optional filepath) "Remove any related links to the file at FILEPATH. This is equivalent to removing the node from the graph." (let ((file (file-truename (or filepath (buffer-file-name (buffer-base-buffer)))))) (dolist (table (mapcar #'car org-roam-db--table-schemata)) (org-roam-db-query `[:delete :from ,table :where (= ,(if (eq table 'links) 'from 'file) $s1)] file)))) ;;;;; Insertion (defun org-roam-db--insert-meta (file hash meta) "Insert HASH and META for a FILE into the Org-roam cache." (org-roam-db-query [:insert :into files :values $v1] (list (vector file hash meta)))) (defun org-roam-db--insert-links (links) "Insert LINKS into the Org-roam cache." (org-roam-db-query [:insert :into links :values $v1] links)) (defun org-roam-db--insert-titles (file titles) "Insert TITLES for a FILE into the Org-roam cache." (org-roam-db-query [:insert :into titles :values $v1] (mapcar (lambda (title) (vector file title)) titles))) (defun org-roam-db--insert-headlines (headlines) "Insert HEADLINES into the Org-roam cache. Returns t if the insertion was successful, nil otherwise. Insertions can fail when there is an ID conflict." (condition-case nil (progn (org-roam-db-query [:insert :into headlines :values $v1] headlines) t) (error (unless (listp headlines) (setq headlines (list headlines))) (lwarn '(org-roam) :error (format "Duplicate IDs in %s, one of:\n\n%s\n\nskipping..." (aref (car headlines) 1) (string-join (mapcar (lambda (hl) (aref hl 0)) headlines) "\n"))) nil))) (defun org-roam-db--insert-tags (file tags) "Insert TAGS for a FILE into the Org-roam cache." (org-roam-db-query [:insert :into tags :values $v1] (list (vector file tags)))) (defun org-roam-db--insert-ref (file ref) "Insert REF for FILE into the Org-roam cache. Returns t if successful, and nil otherwise. Insertions can fail if the key is already in the database." (let ((key (cdr ref)) (type (car ref))) (condition-case nil (progn (org-roam-db-query [:insert :into refs :values $v1] (list (vector key file type))) t) (error (lwarn '(org-roam) :error (format "Duplicate ref %s in:\n\nA: %s\nB: %s\n\nskipping..." key file (caar (org-roam-db-query [:select file :from refs :where (= ref $v1)] (vector key))))) nil)))) ;;;;; Fetching (defun org-roam-db--get-current-files () "Return a hash-table of file to the hash of its file contents." (let* ((current-files (org-roam-db-query [:select * :from files])) (ht (make-hash-table :test #'equal))) (dolist (row current-files) (puthash (car row) (cadr row) ht)) ht)) (defun org-roam-db--get-titles (file) "Return the titles of FILE from the cache." (caar (org-roam-db-query [:select [title] :from titles :where (= file $s1) :limit 1] file))) (defun org-roam-db--connected-component (file) "Return all files reachable from/connected to FILE, including the file itself. If the file does not have any connections, nil is returned." (let* ((query "WITH RECURSIVE links_of(file, link) AS (WITH filelinks AS (SELECT * FROM links WHERE \"type\" = '\"file\"'), citelinks AS (SELECT * FROM links JOIN refs ON links.\"to\" = refs.\"ref\" AND links.\"type\" = '\"cite\"') SELECT \"from\", \"to\" FROM filelinks UNION SELECT \"to\", \"from\" FROM filelinks UNION SELECT \"file\", \"from\" FROM citelinks UNION SELECT \"from\", \"file\" FROM citelinks), connected_component(file) AS (SELECT link FROM links_of WHERE file = $s1 UNION SELECT link FROM links_of JOIN connected_component USING(file)) SELECT * FROM connected_component;") (files (mapcar 'car-safe (emacsql (org-roam-db) query file)))) files)) (defun org-roam-db--links-with-max-distance (file max-distance) "Return all files connected to FILE in at most MAX-DISTANCE steps. This includes the file itself. If the file does not have any connections, nil is returned." (let* ((query "WITH RECURSIVE links_of(file, link) AS (WITH filelinks AS (SELECT * FROM links WHERE \"type\" = '\"file\"'), citelinks AS (SELECT * FROM links JOIN refs ON links.\"to\" = refs.\"ref\" AND links.\"type\" = '\"cite\"') SELECT \"from\", \"to\" FROM filelinks UNION SELECT \"to\", \"from\" FROM filelinks UNION SELECT \"file\", \"from\" FROM citelinks UNION SELECT \"from\", \"file\" FROM citelinks), -- Links are traversed in a breadth-first search. In order to calculate the -- distance of nodes and to avoid following cyclic links, the visited nodes -- are tracked in 'trace'. connected_component(file, trace) AS (VALUES($s1, json_array($s1)) UNION SELECT lo.link, json_insert(cc.trace, '$[' || json_array_length(cc.trace) || ']', lo.link) FROM connected_component AS cc JOIN links_of AS lo USING(file) WHERE ( -- Avoid cycles by only visiting each file once. (SELECT count(*) FROM json_each(cc.trace) WHERE json_each.value == lo.link) == 0 -- Note: BFS is cut off early here. AND json_array_length(cc.trace) < ($s2 + 1))) SELECT DISTINCT file, min(json_array_length(trace)) AS distance FROM connected_component GROUP BY file ORDER BY distance;") ;; In principle the distance would be available in the second column. (files (mapcar 'car-safe (emacsql (org-roam-db) query file max-distance)))) files)) ;;;;; Updating (defun org-roam-db--update-meta () "Update the metadata of the current buffer into the cache." (let* ((file (file-truename (buffer-file-name))) (attr (file-attributes file)) (atime (file-attribute-access-time attr)) (mtime (file-attribute-modification-time attr)) (hash (secure-hash 'sha1 (current-buffer)))) (org-roam-db-query [:delete :from files :where (= file $s1)] file) (org-roam-db--insert-meta file hash (list :atime atime :mtime mtime)))) (defun org-roam-db--update-titles () "Update the title of the current buffer into the cache." (let* ((file (file-truename (buffer-file-name))) (titles (or (org-roam--extract-titles) (list (org-roam--path-to-slug file))))) (org-roam-db-query [:delete :from titles :where (= file $s1)] file) (org-roam-db--insert-titles file titles))) (defun org-roam-db--update-tags () "Update the tags of the current buffer into the cache." (let ((file (file-truename (buffer-file-name))) (tags (org-roam--extract-tags))) (org-roam-db-query [:delete :from tags :where (= file $s1)] file) (when tags (org-roam-db--insert-tags file tags)))) (defun org-roam-db--update-refs () "Update the ref of the current buffer into the cache." (let ((file (file-truename (buffer-file-name)))) (org-roam-db-query [:delete :from refs :where (= file $s1)] file) (when-let ((ref (org-roam--extract-ref))) (org-roam-db--insert-ref file ref)))) (defun org-roam-db--update-links () "Update the file links of the current buffer in the cache." (let ((file (file-truename (buffer-file-name)))) (org-roam-db-query [:delete :from links :where (= from $s1)] file) (when-let ((links (org-roam--extract-links))) (org-roam-db--insert-links links)))) (defun org-roam-db--update-headlines () "Update the file headlines of the current buffer into the cache." (let* ((file (file-truename (buffer-file-name)))) (org-roam-db-query [:delete :from headlines :where (= file $s1)] file) (when-let ((headlines (org-roam--extract-headlines))) (org-roam-db--insert-headlines headlines)))) (defun org-roam-db--update-file (&optional file-path) "Update Org-roam cache for FILE-PATH." (when (org-roam--org-roam-file-p file-path) (let ((buf (or (and file-path (find-file-noselect file-path t)) (current-buffer)))) (with-current-buffer buf (save-excursion (emacsql-with-transaction (org-roam-db--get-connection) (org-roam-db--update-meta) (org-roam-db--update-tags) (org-roam-db--update-titles) (org-roam-db--update-refs) (org-roam-db--update-headlines) (org-roam-db--update-links)) (org-roam-buffer--update-maybe :redisplay t)))))) (defun org-roam-db-build-cache (&optional force) "Build the cache for `org-roam-directory'. If FORCE, force a rebuild of the cache from scratch." (interactive "P") (when force (delete-file (org-roam-db--get))) (org-roam-db--close) ;; Force a reconnect (org-roam-db) ;; To initialize the database, no-op if already initialized (let* ((gc-cons-threshold org-roam-db-gc-threshold) (org-roam-files (org-roam--list-all-files)) (current-files (org-roam-db--get-current-files)) (file-count 0) (headline-count 0) (link-count 0) (tag-count 0) (title-count 0) (ref-count 0) (deleted-count 0)) (emacsql-with-transaction (org-roam-db--get-connection) ;; Two-step building ;; First step: Rebuild files and headlines (dolist (file org-roam-files) (let* ((attr (file-attributes file)) (atime (file-attribute-access-time attr)) (mtime (file-attribute-modification-time attr))) (org-roam--with-temp-buffer file (let ((contents-hash (secure-hash 'sha1 (current-buffer)))) (unless (string= (gethash file current-files) contents-hash) (org-roam-db--clear-file file) (org-roam-db-query [:insert :into files :values $v1] (vector file contents-hash (list :atime atime :mtime mtime))) (setq file-count (1+ file-count)) (when-let ((headlines (org-roam--extract-headlines file))) (when (org-roam-db--insert-headlines headlines) (setq headline-count (1+ headline-count))))))))) ;; Second step: Rebuild the rest (dolist (file org-roam-files) (org-roam--with-temp-buffer file (let ((contents-hash (secure-hash 'sha1 (current-buffer)))) (unless (string= (gethash file current-files) contents-hash) (when-let (links (org-roam--extract-links file)) (org-roam-db-query [:insert :into links :values $v1] links) (setq link-count (1+ link-count))) (when-let (tags (org-roam--extract-tags file)) (org-roam-db-query [:insert :into tags :values $v1] (vector file tags)) (setq tag-count (1+ tag-count))) (let ((titles (or (org-roam--extract-titles) (list (org-roam--path-to-slug file))))) (org-roam-db--insert-titles file titles) (setq title-count (+ title-count (length titles)))) (when-let* ((ref (org-roam--extract-ref))) (when (org-roam-db--insert-ref file ref) (setq ref-count (1+ ref-count))))) (remhash file current-files)))) (dolist (file (hash-table-keys current-files)) ;; These files are no longer around, remove from cache... (org-roam-db--clear-file file) (setq deleted-count (1+ deleted-count)))) (org-roam-message "files: 𝚫%s, headlines: 𝚫%s, links: 𝚫%s, tags: 𝚫%s, titles: 𝚫%s, refs: 𝚫%s, deleted: 𝚫%s" file-count headline-count link-count tag-count title-count ref-count deleted-count))) (provide 'org-roam-db) ;;; org-roam-db.el ends here