相対URIを絶対URIにする

Gaucherfc.uriには"Relative -> Absolute"というコメントが入っていますが、まだ実装されていないので書いてみました。

参考: RFC3986, RFC3986 日本語訳

(use rfc.uri)
(use srfi-11)
(use srfi-13)

(define (uri-relative->absolute rel-uri base-uri)
  (define (split uri)
    (receive (scheme specific)
      (uri-scheme&specific uri)
      (receive (authority path query fragment)
        (uri-decompose-hierarchical specific)
        (values scheme authority path query fragment))))

  ; RFC3986, Section 5.2.3
  (define (merge base-path rel-path base-authority)
    (if (and base-authority (not base-path))
      (string-append "/" rel-path)
      (if-let1 m (#/^(.*)\/[^\/]*$/ base-path)
        (string-append (m 1) "/" rel-path)
        rel-path)))

  ; RFC3986, Section 5.2.4
  (define (remove-dot-segments path)
    (let loop ((input path) (output '()))
      (cond ((not input)
             (string-join (reverse! output) ""))
            ; RFC3986, Section 5.2.4, 2A
            ((#/^\.{1,2}\/(.*)$/ input)
             => (^m (loop (m 1) output)))
            ; RFC3986, Section 5.2.4, 2B
            ((#/^(?:\/\.\/(.*)|\/\.)$/ input)
             => (^m (loop (string-append "/" (or (m 1) "")) output)))
            ; RFC3986, Section 5.2.4, 2C
            ((#/^(?:\/\.\.\/(.*)|\/\.\.)$/ input)
             => (^m (loop (string-append "/" (or (m 1) ""))
                          (if (not (null? output)) (cdr output) output))))
            ; RFC3986, Section 5.2.4, 2D
            ((#/^\.{1,2}$/ input)
             (loop #f output))
            ; RFC3986, Section 5.2.4, 2E
            ((#/^(\/?[^\/]+)(\/.*)?$/ input)
             => (^m (loop (m 2) (cons (m 1) output))))
            (else
              (loop #f (cons input output))))))

  ; RFC3986, Section 5.3
  (define (recompose scheme authority path query fragment)
    (with-output-to-string
      (lambda ()
        (when scheme    (display scheme) (display ":"))
        (when authority (display "//") (display authority))
        (display path)
        (when query     (display "?") (display query))
        (when fragment  (display "#") (display fragment)))))

  (let-values ([(r.scheme r.authority r.path r.query r.fragment) (split rel-uri)]
               [(b.scheme b.authority b.path b.query b.fragment) (split base-uri)])
    (let ([t.scheme #f] [t.authority #f] [t.path #f] [t.query #f] [t.fragment #f])
      ; RFC3986, Section 5.2.2
      (cond
        (r.scheme
         (set! t.scheme r.scheme)
         (set! t.authority r.authority)
         (set! t.path (remove-dot-segments r.path))
         (set! t.query r.query))
        (else
          (cond
            (r.authority
             (set! t.authority r.authority)
             (set! t.path (remove-dot-segments r.path))
             (set! t.query r.query))
            (else
              (cond
                ((not r.path)
                 ; lemniscusさんのコメントでの指摘を受けて、空文字列を返すように修正
                 (set! t.path (or b.path ""))
                 (if r.query
                   (set! t.query r.query)
                   (set! t.query b.query)))
                (else
                  (cond
                    ((string-prefix? "/" r.path)
                     (set! t.path (remove-dot-segments r.path)))
                    (else
                      (set! t.path (merge b.path r.path b.authority))
                      (set! t.path (remove-dot-segments t.path))))
                  (set! t.query r.query)))
              (set! t.authority b.authority)))
          (set! t.scheme b.scheme)))
      (set! t.fragment r.fragment)
      (recompose t.scheme t.authority t.path t.query t.fragment))))


;; ---------------------------------------------------------
;; Test cases from RFC3986 Section 5.4
;;
(use gauche.test)
(define (test-relative-uri-resolution resolver)
  (define base "http://a/b/c/d;p?q")
  (test-start "Relative URI resolution")
  (test-section "Normal")
  (test* "\"g:h\" -> \"g:h\"" "g:h" (resolver "g:h" base))
  (test* "\"g\" -> \"http://a/b/c/g\"" "http://a/b/c/g" (resolver "g" base))
  (test* "\"./g\" -> \"http://a/b/c/g\"" "http://a/b/c/g" (resolver "./g" base))
  (test* "\"g/\" -> \"http://a/b/c/g/\"" "http://a/b/c/g/" (resolver "g/" base))
  (test* "\"/g\" -> \"http://a/g\"" "http://a/g" (resolver "/g" base))
  (test* "\"//g\" -> \"http://g\"" "http://g" (resolver "//g" base))
  (test* "\"?y\" -> \"http://a/b/c/d;p?y\"" "http://a/b/c/d;p?y" (resolver "?y" base))
  (test* "\"g?y\" -> \"http://a/b/c/g?y\"" "http://a/b/c/g?y" (resolver "g?y" base))
  (test* "\"#s\" -> \"http://a/b/c/d;p?q#s\"" "http://a/b/c/d;p?q#s" (resolver "#s" base))
  (test* "\"g#s\" -> \"http://a/b/c/g#s\"" "http://a/b/c/g#s" (resolver "g#s" base))
  (test* "\"g?y#s\" -> \"http://a/b/c/g?y#s\"" "http://a/b/c/g?y#s" (resolver "g?y#s" base))
  (test* "\";x\" -> \"http://a/b/c/;x\"" "http://a/b/c/;x" (resolver ";x" base))
  (test* "\"g;x\" -> \"http://a/b/c/g;x\"" "http://a/b/c/g;x" (resolver "g;x" base))
  (test* "\"g;x?y#s\" -> \"http://a/b/c/g;x?y#s\"" "http://a/b/c/g;x?y#s" (resolver "g;x?y#s" base))
  (test* "\"\" -> \"http://a/b/c/d;p?q\"" "http://a/b/c/d;p?q" (resolver "" base))
  (test* "\".\" -> \"http://a/b/c/\"" "http://a/b/c/" (resolver "." base))
  (test* "\"./\" -> \"http://a/b/c/\"" "http://a/b/c/" (resolver "./" base))
  (test* "\"..\" -> \"http://a/b/\"" "http://a/b/" (resolver ".." base))
  (test* "\"../\" -> \"http://a/b/\"" "http://a/b/" (resolver "../" base))
  (test* "\"../g\" -> \"http://a/b/g\"" "http://a/b/g" (resolver "../g" base))
  (test* "\"../..\" -> \"http://a/\"" "http://a/" (resolver "../.." base))
  (test* "\"../../\" -> \"http://a/\"" "http://a/" (resolver "../../" base))
  (test* "\"../../g\" -> \"http://a/g\"" "http://a/g" (resolver "../../g" base))
  (test-section "Abnormal")
  (test* "\"../../../g\" -> \"http://a/g\"" "http://a/g" (resolver "../../../g" base))
  (test* "\"../../../../g\" -> \"http://a/g\"" "http://a/g" (resolver "../../../../g" base))
  (test* "\"/./g\" -> \"http://a/g\"" "http://a/g" (resolver "/./g" base))
  (test* "\"/../g\" -> \"http://a/g\"" "http://a/g" (resolver "/../g" base))
  (test* "\"g.\" -> \"http://a/b/c/g.\"" "http://a/b/c/g." (resolver "g." base))
  (test* "\".g\" -> \"http://a/b/c/.g\"" "http://a/b/c/.g" (resolver ".g" base))
  (test* "\"g..\" -> \"http://a/b/c/g..\"" "http://a/b/c/g.." (resolver "g.." base))
  (test* "\"..g\" -> \"http://a/b/c/..g\"" "http://a/b/c/..g" (resolver "..g" base))
  (test* "\"./../g\" -> \"http://a/b/g\"" "http://a/b/g" (resolver "./../g" base))
  (test* "\"./g/.\" -> \"http://a/b/c/g/\"" "http://a/b/c/g/" (resolver "./g/." base))
  (test* "\"g/./h\" -> \"http://a/b/c/g/h\"" "http://a/b/c/g/h" (resolver "g/./h" base))
  (test* "\"g/../h\" -> \"http://a/b/c/h\"" "http://a/b/c/h" (resolver "g/../h" base))
  (test* "\"g;x=1/./y\" -> \"http://a/b/c/g;x=1/y\"" "http://a/b/c/g;x=1/y" (resolver "g;x=1/./y" base))
  (test* "\"g;x=1/../y\" -> \"http://a/b/c/y\"" "http://a/b/c/y" (resolver "g;x=1/../y" base))
  (test* "\"g?y/./x\" -> \"http://a/b/c/g?y/./x\"" "http://a/b/c/g?y/./x" (resolver "g?y/./x" base))
  (test* "\"g?y/../x\" -> \"http://a/b/c/g?y/../x\"" "http://a/b/c/g?y/../x" (resolver "g?y/../x" base))
  (test* "\"g#s/./x\" -> \"http://a/b/c/g#s/./x\"" "http://a/b/c/g#s/./x" (resolver "g#s/./x" base))
  (test* "\"g#s/../x\" -> \"http://a/b/c/g#s/../x\"" "http://a/b/c/g#s/../x" (resolver "g#s/../x" base))
  (test* "\"http:g\" -> \"http:g (for strict persers)\"" "http:g" (resolver "http:g" base))
  ;(test* "\"http:g\" -> \"http:g (for backward compatibility)\"" "http://a/b/c/g" (resolver "http:g" base))
  (test-section "Other")
  (test* "empty path" "http://aaa.bbb.ccc?p=10&q=20" (resolver "?p=10&q=20" "http://aaa.bbb.ccc"))
  (test-end))

(test-relative-uri-resolution uri-relative->absolute)

uri-relative->absolute という名前はちょっと長いかも。
RubyURI.joinPythonurljoinのように、引数の順序を入れ替えてuri-joinとかでもいいかもしれません。