From ba1c1853a79a5930ca7db7a6b368859f805df98d Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Thu, 29 Aug 2019 15:59:16 +0200 Subject: swh: Add hooks for rate limiting handling. * guix/swh.scm (%allow-request?, %save-rate-limit-reset-time) (%general-rate-limit-reset-time): New variables. (request-rate-limit-reached?, update-rate-limit-reset-time!): New procedures. (call): Call '%allow-request?'. Change 'swh-error' protocol to pass METHOD in addition to URL. * tests/swh.scm ("rate limit reached") ("%allow-request? and request-rate-limit-reached?"): New tests. --- guix/swh.scm | 84 +++++++++++++++++++++++++++++++++++++++++++++-------------- tests/swh.scm | 36 +++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 20 deletions(-) diff --git a/guix/swh.scm b/guix/swh.scm index c253e217da..42f38ee048 100644 --- a/guix/swh.scm +++ b/guix/swh.scm @@ -20,6 +20,7 @@ #:use-module (guix base16) #:use-module (guix build utils) #:use-module ((guix build syscalls) #:select (mkdtemp!)) + #:use-module (web uri) #:use-module (web client) #:use-module (web response) #:use-module (json) @@ -32,6 +33,9 @@ #:use-module (ice-9 popen) #:use-module ((ice-9 ftw) #:select (scandir)) #:export (%swh-base-url + %allow-request? + + request-rate-limit-reached? origin? origin-id @@ -196,31 +200,71 @@ Software Heritage." ((? string? str) str) ((? null?) #f))) +(define %allow-request? + ;; Takes a URL and method (e.g., the 'http-get' procedure) and returns true + ;; to keep going. This can be used to disallow a requests when + ;; 'request-rate-limit-reached?' returns true, for instance. + (make-parameter (const #t))) + +;; The time when the rate limit for "/origin/save" POST requests and that of +;; other requests will be reset. +;; See . +(define %save-rate-limit-reset-time 0) +(define %general-rate-limit-reset-time 0) + +(define (request-rate-limit-reached? url method) + "Return true if the rate limit has been reached for URI." + (define uri + (string->uri url)) + + (define reset-time + (if (and (eq? method http-post) + (string-prefix? "/api/1/origin/save/" (uri-path uri))) + %save-rate-limit-reset-time + %general-rate-limit-reset-time)) + + (< (car (gettimeofday)) reset-time)) + +(define (update-rate-limit-reset-time! url method response) + "Update the rate limit reset time for URL and METHOD based on the headers in +RESPONSE." + (let ((uri (string->uri url))) + (match (assq-ref (response-headers response) 'x-ratelimit-reset) + ((= string->number (? number? reset)) + (if (and (eq? method http-post) + (string-prefix? "/api/1/origin/save/" (uri-path uri))) + (set! %save-rate-limit-reset-time reset) + (set! %general-rate-limit-reset-time reset))) + (_ + #f)))) + (define* (call url decode #:optional (method http-get) #:key (false-if-404? #t)) "Invoke the endpoint at URL using METHOD. Decode the resulting JSON body using DECODE, a one-argument procedure that takes an input port. When FALSE-IF-404? is true, return #f upon 404 responses." - (let*-values (((response port) - (method url #:streaming? #t))) - ;; See . - (match (assq-ref (response-headers response) 'x-ratelimit-remaining) - (#f #t) - ((? (compose zero? string->number)) - (throw 'swh-error url response)) - (_ #t)) - - (cond ((= 200 (response-code response)) - (let ((result (decode port))) - (close-port port) - result)) - ((and false-if-404? - (= 404 (response-code response))) - (close-port port) - #f) - (else - (close-port port) - (throw 'swh-error url response))))) + (and ((%allow-request?) url method) + (let*-values (((response port) + (method url #:streaming? #t))) + ;; See . + (match (assq-ref (response-headers response) 'x-ratelimit-remaining) + (#f #t) + ((? (compose zero? string->number)) + (update-rate-limit-reset-time! url method response) + (throw 'swh-error url method response)) + (_ #t)) + + (cond ((= 200 (response-code response)) + (let ((result (decode port))) + (close-port port) + result)) + ((and false-if-404? + (= 404 (response-code response))) + (close-port port) + #f) + (else + (close-port port) + (throw 'swh-error url method response)))))) (define-syntax define-query (syntax-rules (path) diff --git a/tests/swh.scm b/tests/swh.scm index 9a0da07ae1..e36c54e5fb 100644 --- a/tests/swh.scm +++ b/tests/swh.scm @@ -19,6 +19,7 @@ (define-module (test-swh) #:use-module (guix swh) #:use-module (guix tests http) + #:use-module (web response) #:use-module (srfi srfi-64)) ;; Test the JSON mapping machinery used in (guix swh). @@ -68,6 +69,41 @@ (directory-entry-length entry))) (lookup-directory "123")))) +(test-equal "rate limit reached" + 3000000000 + (let ((too-many (build-response + #:code 429 + #:reason-phrase "Too many requests" + + ;; Pretend we've reached the limit and it'll be reset in + ;; June 2065. + #:headers '((x-ratelimit-remaining . "0") + (x-ratelimit-reset . "3000000000"))))) + (with-http-server `((,too-many "Too bad.")) + (parameterize ((%swh-base-url (%local-url))) + (catch 'swh-error + (lambda () + (lookup-origin "http://example.org/guix.git")) + (lambda (key url method response) + ;; Ensure the reset time was recorded. + (@@ (guix swh) %general-rate-limit-reset-time))))))) + +(test-assert "%allow-request? and request-rate-limit-reached?" + ;; Here we test two things: that the rate limit set above is in effect and + ;; that %ALLOW-REQUEST? is called, and that 'request-rate-limit-reached?' + ;; returns true. + (let* ((key (gensym "skip-request")) + (skip-if-limit-reached + (lambda (url method) + (or (not (request-rate-limit-reached? url method)) + (throw key #t))))) + (parameterize ((%allow-request? skip-if-limit-reached)) + (catch key + (lambda () + (lookup-origin "http://example.org/guix.git") + #f) + (const #t))))) + (test-end "swh") ;; Local Variables: -- cgit v1.2.3