diff options
author | Christopher Baines <mail@cbaines.net> | 2024-11-08 13:33:11 +0000 |
---|---|---|
committer | Christopher Baines <mail@cbaines.net> | 2024-11-08 22:34:28 +0000 |
commit | ca5f0036f30fc833c70613b1a6f550bdf8d2d5a5 (patch) | |
tree | d6a69e751daf708d78f1a582230059c3733d06b6 /guix-data-service/heap-profiler.scm | |
parent | 0c1e9ad4e466be19f992571ac4fecbbbe2237edf (diff) | |
download | data-service-ca5f0036f30fc833c70613b1a6f550bdf8d2d5a5.tar data-service-ca5f0036f30fc833c70613b1a6f550bdf8d2d5a5.tar.gz |
Add a heap-profiler module
Taken from a Guile mailing list post.
Diffstat (limited to 'guix-data-service/heap-profiler.scm')
-rw-r--r-- | guix-data-service/heap-profiler.scm | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/guix-data-service/heap-profiler.scm b/guix-data-service/heap-profiler.scm new file mode 100644 index 0000000..fa838f5 --- /dev/null +++ b/guix-data-service/heap-profiler.scm @@ -0,0 +1,225 @@ +;;; Copyright © 2022 Ludovic Courtès <ludo@gnu.org> +;;; +;;; Distributed under the GNU Lesser General Public License, version 3 or (at +;;; your option) any later version. + +(define-module (guix-data-service heap-profiler) + #:use-module (system foreign) + #:use-module (system base types internal) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-9 gnu) + #:use-module (ice-9 match) + #:use-module (ice-9 control) + #:use-module (ice-9 format) + #:use-module (ice-9 rdelim) + #:use-module (ice-9 regex) + #:export (profile-heap)) + +(define-immutable-record-type <memory-mapping> + (memory-mapping start end permissions name) + memory-mapping? + (start memory-mapping-start) + (end memory-mapping-end) + (permissions memory-mapping-permissions) + (name memory-mapping-name)) + +(define (memory-mappings pid) ;based on Guile's 'gc-profile.scm' + "Return an list of alists, each of which contains information about a memory +mapping of process @var{pid}. This information is obtained by reading +@file{/proc/PID/maps} on Linux. See `procs(5)' for details." + + (define mapping-line-rx + ;; As of Linux 2.6.32.28, an `maps' line looks like this: + ;; "00400000-0041d000 r--p 00000000 fd:00 7926441 /bin/cat". + (make-regexp + "^([[:xdigit:]]+)-([[:xdigit:]]+) ([rwx-]{3}[ps]) ([[:xdigit:]]+) (fd|[[:xdigit:]]{2}):[[:xdigit:]]{2} [0-9]+[[:blank:]]+(.*)$")) + + (call-with-input-file (format #f "/proc/~a/maps" pid) + (lambda (port) + (let loop ((result '())) + (match (read-line port) + ((? eof-object?) + (reverse result)) + (line + (cond ((regexp-exec mapping-line-rx line) + => + (lambda (match) + (let ((start (string->number (match:substring match 1) + 16)) + (end (string->number (match:substring match 2) + 16)) + (perms (match:substring match 3)) + (name (match:substring match 6))) + (loop (cons (memory-mapping + start end perms + (if (string=? name "") + #f + name)) + result))))) + (else + (loop result))))))))) + +;; (define random-valid-address +;; ;; XXX: This is only in libgc with back pointers. +;; (let ((ptr (false-if-exception +;; (dynamic-func "GC_generate_random_valid_address" (dynamic-link))))) +;; (if ptr +;; (pointer->procedure '* ptr '()) +;; (const #f)))) + +(define (heap-sections) + (filter (lambda (mapping) + (and (not (memory-mapping-name mapping)) + (string=? "rw-p" (memory-mapping-permissions mapping)))) + (memory-mappings (getpid)))) + +(define (random-valid-address heap-sections) + ;; Mimic 'GC_generate_random_valid_address', which is only available with + ;; '-DBACK_PTRS' builds of libgc. + (define heap-size + (fold (lambda (mapping size) + (+ size (- (memory-mapping-end mapping) + (memory-mapping-start mapping)))) + 0 + heap-sections)) + + (let loop ((sections heap-sections) + (size 0) + (offset (random heap-size))) + (match sections + (() #f) + ((section . rest) + (let* ((start (memory-mapping-start section)) + (end (memory-mapping-end section)) + (section-size (- end start))) + (if (< offset section-size) + (let ((result (base-pointer (+ start offset)))) + ;; (pk 'p (number->string (+ start offset) 16) result) + (if (null-pointer? result) + (loop heap-sections 0 (random heap-size)) ;retry + result)) + (loop rest + (+ size section-size) + (- offset section-size)))))))) + +(define object-size + (pointer->procedure size_t + (dynamic-func "GC_size" (dynamic-link)) + '(*))) + +(define base-pointer + (pointer->procedure '* + (dynamic-func "GC_base" (dynamic-link)) + (list uintptr_t))) + +(define (heap-tag->type-name word) + "Return the type name as a symbol corresponding to the tag WORD." + (match (let/ec return + (let-syntax ((tag-name (syntax-rules () + ((_ name pred mask tag) + (when (= (logand word mask) tag) + (return 'name)))))) + (visit-heap-tags tag-name) + 'unknown)) + ('program + (cond ((= (logand word #x1000) #x1000) + 'partial-continuation) + ((= (logand word #x2000) #x2000) + 'foreign-program) + ((= (logand word #x800) #x800) + 'continuation) + ((= (logand word #x400) #x400) + 'primitive-generic) + ((= (logand word #x200) #x200) + 'primitive) + ((= (logand word #x100) #x100) + 'boot-program) + (else + 'program))) + (type + type))) + +(define* (profile-heap #:key (sample-count 100000)) + "Pick SAMPLE-COUNT addresses in the GC-managed heap and display a profile +of this sample per data type." + (define heap-size + (assoc-ref (gc-stats) 'heap-size)) + + (define heap + (heap-sections)) + + (let ((objects (make-hash-table 57)) + (visited (make-hash-table))) + (let loop ((i sample-count)) + (unless (zero? i) + (let ((address (random-valid-address heap))) + (if (hashv-ref visited (pointer-address address)) + (loop i) + (begin + (hashv-set! visited (pointer-address address) #t) + (let* ((tag (pointer-address (dereference-pointer address))) + (type (heap-tag->type-name tag)) + (size (match type + ('pair (* 2 (sizeof '*))) + ('vector + (min (ash tag -8) + (object-size address))) + (_ (object-size address))))) + ;; (when (eq? 'unknown type) + ;; (pk (object-size address))) + ;; (when (eq? 'vector type) + ;; (pk 'vector size 'tag tag 'addr address 'vs (object-size address))) + (hashq-set! objects type + (match (hashq-ref objects type '(0 . 0)) + ((count . total) + (cons (+ count 1) (+ total size)))))) + (loop (- i 1))))))) + (let ((grand-total (hash-fold (lambda (type stats result) + (match stats + ((_ . total) + (+ total result)))) + 0 + objects))) + (format (current-error-port) + " % type self avg obj size~%") + (for-each (match-lambda + ((type . (count . total)) + (format (current-error-port) "~5,1f ~30a ~14h ~7,1f~%" + (* 100. (/ total grand-total)) + type total + (/ total count 1.)))) + (sort (hash-map->list cons objects) + (match-lambda* + (((_ . (count1 . total1)) (_ . (count2 . total2))) + (or (> total1 total2) + (and (= total1 total2) + (> count1 count2))))))) + (format (current-error-port) "sampled heap: ~h MiB (heap size: ~h MiB)~%" + (/ grand-total (expt 2. 20)) + (/ heap-size (expt 2. 20)))))) + +(define (heap-samples type count) + "Sample COUNT objects of the given TYPE, a symbol such as 'vector, and +return them. + +WARNING: This can crash your application as this could pick bogus or +finalized objects." + (define heap + (heap-sections)) + + (let ((visited (make-hash-table))) + (let loop ((i count) + (objects '())) + (if (zero? i) + objects + (let ((address (random-valid-address heap))) + (if (hashv-ref visited (pointer-address address)) + (loop i objects) + (begin + (hashv-set! visited (pointer-address address) #t) + (let ((tag (pointer-address (dereference-pointer address)))) + (if (eq? type (heap-tag->type-name tag)) + (loop (- i 1) + (cons (pointer->scm address) objects)) + (loop i objects)))))))))) + |