[REBOL] XML Parsing...
From: eventi:nyic at: 26-Oct-2000 13:18
I'm by no means an expert, but here's something I've been playing with:
REBOL[]
;; utility stuff
tablevel: 0
inc: func [ 'var ] [ set var add 1 get var ]
dec: func [ 'var ] [ set var subtract get var 1 ]
indent: does [ repeat junk tablevel [ prin "^-" ] ]
xml-parser: make object! [
handled: make block! 10
dispatch: func [ tagname attribute-list contents ] [
; print rejoin [ "Dispaching " tagname ]
do get select handled tagname attribute-list contents
]
start:
stop: none
parse: func [ xml ] [ start do-block xml stop ]
do-block: func [
xml [block!]
/local tagname attribute-list contents name value element
][
foreach [tagname attribute-list contents] xml [
either find handled tagname [
dispatch tagname attribute-list contents
][
;; This part handles the unhandlable
;; Remove the comments, and it'll print the XML back out
; indent prin rejoin ["<" tagname]
; inc tablevel
if attribute-list [
foreach [name value] attribute-list [
; prin rejoin [" " name {="} value {"}]
]
]
either contents [
; print ">"
foreach element contents [
either equal? type? element block! [
do-block element
][
; indent print element
]
]
; dec tablevel
; indent print rejoin ["</" tagname ">"]
] [
; dec tablevel
; indent print " />"
]
]
]
]
]
;; Here's an example: parses a page from moreover.com, and makes it into
link soup
html: make string! ""
emit: func [ what ] [ append html what ]
article: make object! [
headline:
time:
url: none
]
do-headline: func [attribute-list contents] [article/headline: copy
contents]
do-url: func [attribute-list contents] [article/url: copy contents]
do-time: func [attribute-list contents] [article/time: copy contents]
article-parser: make xml-parser [
handled: [ "headline_text" 'do-headline "url" 'do-url "harvest_time"
'do-time ]
]
do-article: func [attribute-list contents] [
foreach element contents [
either equal? type? element block! [
article-parser/parse element
][
; indent print element
]
]
emit rejoin [ {<a href="} article/url {">} article/headline </a>
article/time <br> ]
]
moreover-parser: make xml-parser [
start: does [ emit [ <html> <body> ] ]
stop: does [ emit [ </body> </html> ] ]
handled: [ "article" 'do-article ]
]
;; You have to be a big fan of f---edcompany.com's webboards to appreciate
this link.
;; "This is not a toy to be trifled with by children like you!"
moreover-parser/parse parse-xml read
http://p.moreover.com/cgi-local/page?index_crm+xml
print html