Script Library: 1238 scripts
  • Home
  • Script library
  • AltME Archive
  • Mailing list
  • Articles Index
  • Site search
 

Archive version of: url-handler.r ... version: 3 ... hallyhaa 16-Oct-2004

Amendment note: Bug fix. Some absolute URLs were taken for relative ones in previous version. || Publicly available? Yes

#!/usr/local/bin/rebol -cs

REBOL [
  Title: "URL Handler"
  File: %url-handler.r
  Author: [ "HY" ]
  Purpose: { A script to handle URLs as objects. Rebol's built-in 'decode-url function
             returns an object, but no methods to manipulate it. This script includes
             functions that make it possible to manipulate the URL as you please.
           }
  Date: 16-Aug-2003
  History: [
             16-Oct-2004 {Removed a bug that caused absolute URLs starting with other protocols than
                          http and ftp to be parsed as relative URLs. The URL-handler now accepts
                          other protocols again. It is reasonable to accept all protocols, but as I
                          use only free rebol versions, I have no https protocol, and so I have to
                          check this elsewhere.}
             25-Sep-2004 "Added the library header"
             04-May-2004 {Removed a bug that caused CGI parameters to be lost from the string used
                          to construct the object. This bug found its way in on one of the two
                          previous updates.}
             22-Mar-2004 {Removed a bug that caused a crash when a URL ended with
                          no trailing slash after the host name, but a line break instead.}
             26-Mar-2004 {Now accepts only ftp:// and http:// protocols. Also fixed a bug that would cause
                          "http://some/redirect?http://www.url.com/" to be parsed erroneously.}
             22-Mar-2004 "Added protocols that we don't wish to support"
             19-Mar-2004 "Added a few examples and forbid relative URLs with no protcol and/or host"
             13-Jan-2004 "Fixed a bug in handling of relative (section) links"
             16-Aug-2003 "Seems this is the date I first made the script."
           ]
  Examples: {
    site: url-handler "http://www.rebol.com"
    print site/url ; == http://www.rebol.com/
    site/move-to "docs.html"
    site/move-to "http://www.rebol.com/docs/core23/rebolcore-1.html"
    site/move-to "#sect1"
    print site/protocol ; == "http://"
    print site/host ; == "www.rebol.com"
    print site/path ; == "/docs/core23/rebolcore-1.html"
    print site/file ; == "rebolcore-1.html"
    print site/query-part ; == ""
    print site/section ; == "#sect1"
  }
  TODO: "Handle CGI parameters better?"
  Library: [
    level: 'intermediate
    domain: [http file-handling other-net]
    license: none
    Platform: 'all
    Tested-under: none
    Type: [module]
    Support: none
  ]
]

url-handler-object: context [

  ; 'context should automatically pick up set-words
  ; and bind them to the new local context, so:
  protocol: ""
  host: ""
  user-name: ""
  password: ""
  port: 80 ; default
  path: ""
  file: ""
  query-part: ""
  section: ""
  rest: ""
  url: ""


  init: func [ /local relative? ] [


    rest: trim/lines url
    if 0 = length? rest [
      throw "No URL to parse!"
    ]
    relative?: all [#"#" = first url "" <> protocol "" <> host]

    either section: find rest "#"    [ section: copy section
                                       rest: head remove/part find rest "#" length? section ]
                                     [ section: "" ]

    if 0 = length? rest [
      either relative? [ rejoin-it return ] [ throw "No URL to parse!" ]
    ]
    either query-part: find rest "?" [ query-part: copy query-part
                                       rest: head remove/part find rest "?" length? query-part ]
                                     [ query-part: "" ]

    relative?: not find/any url "*://"

    if all [relative? any ["" = protocol "" = host]] [
      throw "Unsupported or lacking protocol and/or host information!"
    ]

    ;if any [ find/match url "mailto:"
    ;         find/match url "javascript:"
    ;         find/match url "file://"
    ;         find/match url "https://" ; irritating this is
    ;         find/match url "whois://"
    ;         find/match url "news://"
    ;         find/match url "irc://"
    ;         find/match url "pop://"
    ;         find/match url "tcp://"
    ;         find/match url "mysql://"
    ;         find/match url "web://"
    ;         find/match url "dns://"
    ;         find/match url "rtsp://"
    ;         find/match url "serial://"
    ;         find/match url "simple://"
    ;         find/match url "pnm://"
    ;       ] [ throw "Unsupported protocol!" ]

    either relative? [

      ; protocol and host supposedly known.
      if any ["" = protocol "" = host] [throw "Lacking protocol and/or host information!" "(How could this happen?)"]

      if "" = rest [
        rejoin-it
        return
      ]
      path: either #"/" = first rest [ copy rest ] [ join path rest ]

      if not any [none? path "" = path] [
        file: either #"/" = last path [ "" ] [ last parse path "/" ]
      ]

      if not any [none? file "" = file] [
        path: copy/part path (index? find path file) - 1
      ]

    ] [
      ; not relative. Must have protocol and host information
      if any [none? rest "" = rest] [ throw "No protocol specified!" ]

      ; protocol must be the first thing in the url string.

      ;rest: any [find/match url "ftp://" find/match url "http://"]
      rest: find/match/any url "*://"

      protocol: copy/part head rest (index? rest) - 1

      if any [none? rest "" = rest] [ throw "No host specified!" ]

      phost: first parse rest "/" ; don't need parse/all, because no spaces are allowed in URLs
                                 ; (and if there were spaces in a url, it most certainly is _not_
                                 ; in the host part anyway)

      host: parse phost "@"
      if 2 = length? host [
        phost: second host
        host: parse first host ":"
        user-name: first host
        if 2 = length? host [
          password: second host
        ]
      ]

      host: parse phost ":"
      if 2 = length? host [
        if error? try [ port: to-integer second host  ] [ throw "Port number is not an integer!" ]
      ]
      if 0 = length? host [ throw "Imparsible (?) host name" ]
      host: first host

      rest: find/tail rest phost
      path: either any [none? rest 0 = length? rest] [ "/" ] [ copy rest ]

      if not any [none? path "" = path] [
        file: either #"/" = last path [ "" ] [ last parse path "/" ]
      ]

      if not any [none? file "" = file] [
        path: copy/part path (index? find path file) - 1
      ]

    ] ; end either relative?


    rejoin-it

  ] ; end init


  rejoin-it: func [] [
    ; first remove ../ sequences:
    if find path "../" [
      parts: parse path "/"
      while [i: find parts ".."] [remove remove back i]
      while [i: find parts ""] [remove i]
      forall parts [
        insert parts "/"
        parts: next parts
      ]
      path: rejoin head parts
      if 0 = length? path [path: "/"]
    ]

    ; then remove ./ sequences:
    if find path "./" [
      parts: parse path "/"
      while [i: find parts "."] [remove i]
      while [i: find parts ""] [remove i]
      forall parts [
        insert parts "/"
        parts: next parts
      ]
      path: rejoin head parts
      if 0 = length? path [path: "/"]
    ]

    ; then rejoin:
    url: either port = 80 [
      either user-name = "" [
        rejoin [protocol host path file query-part section]
      ] [
        either password = "" [
          rejoin [protocol user-name "@" host path file query-part section]
        ] [
          rejoin [protocol user-name ":" password "@" host path file query-part section]
        ]
      ]
    ] [
      either user-name = "" [
        rejoin [protocol host ":" port path file query-part section]
      ] [
        either password = "" [
          rejoin [protocol user-name "@" host ":" port path file query-part section]
        ] [
          rejoin [protocol user-name ":" password "@" host ":" port path file query-part section]
        ]
      ]
    ]
  ]

  canonical: func [] [
    either user-name = "" [
      rejoin [protocol host ":" port path file query-part section]
    ] [
      either password = "" [
        rejoin [protocol user-name "@" host ":" port path file query-part section]
      ] [
        rejoin [protocol user-name ":" password "@" host ":" port path file query-part section]
      ]
    ]
  ]

  plain: func [] [
    rejoin [protocol host path file query-part section]
  ]


  move-to: func [ target ] [
    url: copy target
    init
  ] ; end move-to


]

; shortcut:
url-handler: func [ st [string!] /local s] [
  s: make url-handler-object [ url: copy st ]
  s/init
  return s
]
Notes