;;; Copyright © 2022 Ludovic Courtès ;;; ;;; Distributed under the GNU Lesser General Public License, version 3 or (at ;;; your option) any later version. (define-module (guix-data-service heap-profiler) #:use-module (system foreign) #:use-module (system base types internal) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9 gnu) #:use-module (ice-9 match) #:use-module (ice-9 control) #:use-module (ice-9 format) #:use-module (ice-9 rdelim) #:use-module (ice-9 regex) #:export (profile-heap)) (define-immutable-record-type (memory-mapping start end permissions name) memory-mapping? (start memory-mapping-start) (end memory-mapping-end) (permissions memory-mapping-permissions) (name memory-mapping-name)) (define (memory-mappings pid) ;based on Guile's 'gc-profile.scm' "Return an list of alists, each of which contains information about a memory mapping of process @var{pid}. This information is obtained by reading @file{/proc/PID/maps} on Linux. See `procs(5)' for details." (define mapping-line-rx ;; As of Linux 2.6.32.28, an `maps' line looks like this: ;; "00400000-0041d000 r--p 00000000 fd:00 7926441 /bin/cat". (make-regexp "^([[:xdigit:]]+)-([[:xdigit:]]+) ([rwx-]{3}[ps]) ([[:xdigit:]]+) (fd|[[:xdigit:]]{2}):[[:xdigit:]]{2} [0-9]+[[:blank:]]+(.*)$")) (call-with-input-file (format #f "/proc/~a/maps" pid) (lambda (port) (let loop ((result '())) (match (read-line port) ((? eof-object?) (reverse result)) (line (cond ((regexp-exec mapping-line-rx line) => (lambda (match) (let ((start (string->number (match:substring match 1) 16)) (end (string->number (match:substring match 2) 16)) (perms (match:substring match 3)) (name (match:substring match 6))) (loop (cons (memory-mapping start end perms (if (string=? name "") #f name)) result))))) (else (loop result))))))))) ;; (define random-valid-address ;; ;; XXX: This is only in libgc with back pointers. ;; (let ((ptr (false-if-exception ;; (dynamic-func "GC_generate_random_valid_address" (dynamic-link))))) ;; (if ptr ;; (pointer->procedure '* ptr '()) ;; (const #f)))) (define (heap-sections) (filter (lambda (mapping) (and (not (memory-mapping-name mapping)) (string=? "rw-p" (memory-mapping-permissions mapping)))) (memory-mappings (getpid)))) (define (random-valid-address heap-sections) ;; Mimic 'GC_generate_random_valid_address', which is only available with ;; '-DBACK_PTRS' builds of libgc. (define heap-size (fold (lambda (mapping size) (+ size (- (memory-mapping-end mapping) (memory-mapping-start mapping)))) 0 heap-sections)) (let loop ((sections heap-sections) (size 0) (offset (random heap-size))) (match sections (() #f) ((section . rest) (let* ((start (memory-mapping-start section)) (end (memory-mapping-end section)) (section-size (- end start))) (if (< offset section-size) (let ((result (base-pointer (+ start offset)))) ;; (pk 'p (number->string (+ start offset) 16) result) (if (null-pointer? result) (loop heap-sections 0 (random heap-size)) ;retry result)) (loop rest (+ size section-size) (- offset section-size)))))))) (define object-size (pointer->procedure size_t (dynamic-func "GC_size" (dynamic-link)) '(*))) (define base-pointer (pointer->procedure '* (dynamic-func "GC_base" (dynamic-link)) (list uintptr_t))) (define (heap-tag->type-name word) "Return the type name as a symbol corresponding to the tag WORD." (match (let/ec return (let-syntax ((tag-name (syntax-rules () ((_ name pred mask tag) (when (= (logand word mask) tag) (return 'name)))))) (visit-heap-tags tag-name) 'unknown)) ('program (cond ((= (logand word #x1000) #x1000) 'partial-continuation) ((= (logand word #x2000) #x2000) 'foreign-program) ((= (logand word #x800) #x800) 'continuation) ((= (logand word #x400) #x400) 'primitive-generic) ((= (logand word #x200) #x200) 'primitive) ((= (logand word #x100) #x100) 'boot-program) (else 'program))) (type type))) (define* (profile-heap #:key (sample-count 100000)) "Pick SAMPLE-COUNT addresses in the GC-managed heap and display a profile of this sample per data type." (define heap-size (assoc-ref (gc-stats) 'heap-size)) (define heap (heap-sections)) (let ((objects (make-hash-table 57)) (visited (make-hash-table))) (let loop ((i sample-count)) (unless (zero? i) (let ((address (random-valid-address heap))) (if (hashv-ref visited (pointer-address address)) (loop i) (begin (hashv-set! visited (pointer-address address) #t) (let* ((tag (pointer-address (dereference-pointer address))) (type (heap-tag->type-name tag)) (size (match type ('pair (* 2 (sizeof '*))) ('vector (min (ash tag -8) (object-size address))) (_ (object-size address))))) ;; (when (eq? 'unknown type) ;; (pk (object-size address))) ;; (when (eq? 'vector type) ;; (pk 'vector size 'tag tag 'addr address 'vs (object-size address))) (hashq-set! objects type (match (hashq-ref objects type '(0 . 0)) ((count . total) (cons (+ count 1) (+ total size)))))) (loop (- i 1))))))) (let ((grand-total (hash-fold (lambda (type stats result) (match stats ((_ . total) (+ total result)))) 0 objects))) (format (current-error-port) " % type self avg obj size~%") (for-each (match-lambda ((type . (count . total)) (format (current-error-port) "~5,1f ~30a ~14h ~7,1f~%" (* 100. (/ total grand-total)) type total (/ total count 1.)))) (sort (hash-map->list cons objects) (match-lambda* (((_ . (count1 . total1)) (_ . (count2 . total2))) (or (> total1 total2) (and (= total1 total2) (> count1 count2))))))) (format (current-error-port) "sampled heap: ~h MiB (heap size: ~h MiB)~%" (/ grand-total (expt 2. 20)) (/ heap-size (expt 2. 20)))))) (define (heap-samples type count) "Sample COUNT objects of the given TYPE, a symbol such as 'vector, and return them. WARNING: This can crash your application as this could pick bogus or finalized objects." (define heap (heap-sections)) (let ((visited (make-hash-table))) (let loop ((i count) (objects '())) (if (zero? i) objects (let ((address (random-valid-address heap))) (if (hashv-ref visited (pointer-address address)) (loop i objects) (begin (hashv-set! visited (pointer-address address) #t) (let ((tag (pointer-address (dereference-pointer address)))) (if (eq? type (heap-tag->type-name tag)) (loop (- i 1) (cons (pointer->scm address) objects)) (loop i objects))))))))))