aboutsummaryrefslogtreecommitdiff
path: root/guix-data-service/heap-profiler.scm
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2024-11-08 13:33:11 +0000
committerChristopher Baines <mail@cbaines.net>2024-11-08 22:34:28 +0000
commitca5f0036f30fc833c70613b1a6f550bdf8d2d5a5 (patch)
treed6a69e751daf708d78f1a582230059c3733d06b6 /guix-data-service/heap-profiler.scm
parent0c1e9ad4e466be19f992571ac4fecbbbe2237edf (diff)
downloaddata-service-ca5f0036f30fc833c70613b1a6f550bdf8d2d5a5.tar
data-service-ca5f0036f30fc833c70613b1a6f550bdf8d2d5a5.tar.gz
Add a heap-profiler module
Taken from a Guile mailing list post.
Diffstat (limited to 'guix-data-service/heap-profiler.scm')
-rw-r--r--guix-data-service/heap-profiler.scm225
1 files changed, 225 insertions, 0 deletions
diff --git a/guix-data-service/heap-profiler.scm b/guix-data-service/heap-profiler.scm
new file mode 100644
index 0000000..fa838f5
--- /dev/null
+++ b/guix-data-service/heap-profiler.scm
@@ -0,0 +1,225 @@
+;;; Copyright © 2022 Ludovic Courtès <ludo@gnu.org>
+;;;
+;;; Distributed under the GNU Lesser General Public License, version 3 or (at
+;;; your option) any later version.
+
+(define-module (guix-data-service heap-profiler)
+ #:use-module (system foreign)
+ #:use-module (system base types internal)
+ #:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-9 gnu)
+ #:use-module (ice-9 match)
+ #:use-module (ice-9 control)
+ #:use-module (ice-9 format)
+ #:use-module (ice-9 rdelim)
+ #:use-module (ice-9 regex)
+ #:export (profile-heap))
+
+(define-immutable-record-type <memory-mapping>
+ (memory-mapping start end permissions name)
+ memory-mapping?
+ (start memory-mapping-start)
+ (end memory-mapping-end)
+ (permissions memory-mapping-permissions)
+ (name memory-mapping-name))
+
+(define (memory-mappings pid) ;based on Guile's 'gc-profile.scm'
+ "Return an list of alists, each of which contains information about a memory
+mapping of process @var{pid}. This information is obtained by reading
+@file{/proc/PID/maps} on Linux. See `procs(5)' for details."
+
+ (define mapping-line-rx
+ ;; As of Linux 2.6.32.28, an `maps' line looks like this:
+ ;; "00400000-0041d000 r--p 00000000 fd:00 7926441 /bin/cat".
+ (make-regexp
+ "^([[:xdigit:]]+)-([[:xdigit:]]+) ([rwx-]{3}[ps]) ([[:xdigit:]]+) (fd|[[:xdigit:]]{2}):[[:xdigit:]]{2} [0-9]+[[:blank:]]+(.*)$"))
+
+ (call-with-input-file (format #f "/proc/~a/maps" pid)
+ (lambda (port)
+ (let loop ((result '()))
+ (match (read-line port)
+ ((? eof-object?)
+ (reverse result))
+ (line
+ (cond ((regexp-exec mapping-line-rx line)
+ =>
+ (lambda (match)
+ (let ((start (string->number (match:substring match 1)
+ 16))
+ (end (string->number (match:substring match 2)
+ 16))
+ (perms (match:substring match 3))
+ (name (match:substring match 6)))
+ (loop (cons (memory-mapping
+ start end perms
+ (if (string=? name "")
+ #f
+ name))
+ result)))))
+ (else
+ (loop result)))))))))
+
+;; (define random-valid-address
+;; ;; XXX: This is only in libgc with back pointers.
+;; (let ((ptr (false-if-exception
+;; (dynamic-func "GC_generate_random_valid_address" (dynamic-link)))))
+;; (if ptr
+;; (pointer->procedure '* ptr '())
+;; (const #f))))
+
+(define (heap-sections)
+ (filter (lambda (mapping)
+ (and (not (memory-mapping-name mapping))
+ (string=? "rw-p" (memory-mapping-permissions mapping))))
+ (memory-mappings (getpid))))
+
+(define (random-valid-address heap-sections)
+ ;; Mimic 'GC_generate_random_valid_address', which is only available with
+ ;; '-DBACK_PTRS' builds of libgc.
+ (define heap-size
+ (fold (lambda (mapping size)
+ (+ size (- (memory-mapping-end mapping)
+ (memory-mapping-start mapping))))
+ 0
+ heap-sections))
+
+ (let loop ((sections heap-sections)
+ (size 0)
+ (offset (random heap-size)))
+ (match sections
+ (() #f)
+ ((section . rest)
+ (let* ((start (memory-mapping-start section))
+ (end (memory-mapping-end section))
+ (section-size (- end start)))
+ (if (< offset section-size)
+ (let ((result (base-pointer (+ start offset))))
+ ;; (pk 'p (number->string (+ start offset) 16) result)
+ (if (null-pointer? result)
+ (loop heap-sections 0 (random heap-size)) ;retry
+ result))
+ (loop rest
+ (+ size section-size)
+ (- offset section-size))))))))
+
+(define object-size
+ (pointer->procedure size_t
+ (dynamic-func "GC_size" (dynamic-link))
+ '(*)))
+
+(define base-pointer
+ (pointer->procedure '*
+ (dynamic-func "GC_base" (dynamic-link))
+ (list uintptr_t)))
+
+(define (heap-tag->type-name word)
+ "Return the type name as a symbol corresponding to the tag WORD."
+ (match (let/ec return
+ (let-syntax ((tag-name (syntax-rules ()
+ ((_ name pred mask tag)
+ (when (= (logand word mask) tag)
+ (return 'name))))))
+ (visit-heap-tags tag-name)
+ 'unknown))
+ ('program
+ (cond ((= (logand word #x1000) #x1000)
+ 'partial-continuation)
+ ((= (logand word #x2000) #x2000)
+ 'foreign-program)
+ ((= (logand word #x800) #x800)
+ 'continuation)
+ ((= (logand word #x400) #x400)
+ 'primitive-generic)
+ ((= (logand word #x200) #x200)
+ 'primitive)
+ ((= (logand word #x100) #x100)
+ 'boot-program)
+ (else
+ 'program)))
+ (type
+ type)))
+
+(define* (profile-heap #:key (sample-count 100000))
+ "Pick SAMPLE-COUNT addresses in the GC-managed heap and display a profile
+of this sample per data type."
+ (define heap-size
+ (assoc-ref (gc-stats) 'heap-size))
+
+ (define heap
+ (heap-sections))
+
+ (let ((objects (make-hash-table 57))
+ (visited (make-hash-table)))
+ (let loop ((i sample-count))
+ (unless (zero? i)
+ (let ((address (random-valid-address heap)))
+ (if (hashv-ref visited (pointer-address address))
+ (loop i)
+ (begin
+ (hashv-set! visited (pointer-address address) #t)
+ (let* ((tag (pointer-address (dereference-pointer address)))
+ (type (heap-tag->type-name tag))
+ (size (match type
+ ('pair (* 2 (sizeof '*)))
+ ('vector
+ (min (ash tag -8)
+ (object-size address)))
+ (_ (object-size address)))))
+ ;; (when (eq? 'unknown type)
+ ;; (pk (object-size address)))
+ ;; (when (eq? 'vector type)
+ ;; (pk 'vector size 'tag tag 'addr address 'vs (object-size address)))
+ (hashq-set! objects type
+ (match (hashq-ref objects type '(0 . 0))
+ ((count . total)
+ (cons (+ count 1) (+ total size))))))
+ (loop (- i 1)))))))
+ (let ((grand-total (hash-fold (lambda (type stats result)
+ (match stats
+ ((_ . total)
+ (+ total result))))
+ 0
+ objects)))
+ (format (current-error-port)
+ " % type self avg obj size~%")
+ (for-each (match-lambda
+ ((type . (count . total))
+ (format (current-error-port) "~5,1f ~30a ~14h ~7,1f~%"
+ (* 100. (/ total grand-total))
+ type total
+ (/ total count 1.))))
+ (sort (hash-map->list cons objects)
+ (match-lambda*
+ (((_ . (count1 . total1)) (_ . (count2 . total2)))
+ (or (> total1 total2)
+ (and (= total1 total2)
+ (> count1 count2)))))))
+ (format (current-error-port) "sampled heap: ~h MiB (heap size: ~h MiB)~%"
+ (/ grand-total (expt 2. 20))
+ (/ heap-size (expt 2. 20))))))
+
+(define (heap-samples type count)
+ "Sample COUNT objects of the given TYPE, a symbol such as 'vector, and
+return them.
+
+WARNING: This can crash your application as this could pick bogus or
+finalized objects."
+ (define heap
+ (heap-sections))
+
+ (let ((visited (make-hash-table)))
+ (let loop ((i count)
+ (objects '()))
+ (if (zero? i)
+ objects
+ (let ((address (random-valid-address heap)))
+ (if (hashv-ref visited (pointer-address address))
+ (loop i objects)
+ (begin
+ (hashv-set! visited (pointer-address address) #t)
+ (let ((tag (pointer-address (dereference-pointer address))))
+ (if (eq? type (heap-tag->type-name tag))
+ (loop (- i 1)
+ (cons (pointer->scm address) objects))
+ (loop i objects))))))))))
+