Script Library: 1222 scripts
 

xml2rebxml.r

REBOL [ Title: "XML to RebXML Converter" Date: 08-Nov-2005 Version: 1.2.0 File: %xml2rebxml.r Author: "John Niclasen" Rights: {Copyright © John Niclasen, NicomSoft 2005} Purpose: {Convert XML to RebXML block structure.} Comment: { Build from xml2txt.r and XML 1.0 DTD from www.w3.org. } History: [ 1.2.0 [08-11-2005 JN {Added suppport for " and ' in Attribute. Added preservation of comments.}] 1.1.2 [06-11-2005 JN {Fixed bug with multi comments.}] 1.1.1 [24-02-2005 JN {Changed EmptyElemTag from # to a slash: /}] 1.1.0 [23-02-2005 JN {Added support for namespace-tags. Changed EmptyElemTag from "" to a number sign: #}] 1.0.1 [31-01-2005 JN {Added "load mold" before returning output. Added replacements for ">", "<" and "&".}] 1.0.0 [25-01-2005 JN {Created.}] ] library: [ level: 'intermediate platform: 'all type: 'tool domain: [markup parse xml] tested-under: none support: none license: 'BSD see-also: "rebxml2xml.r" ] ] output: make block! 1000 context [ preserve-comments: false lit-slash: to-lit-word "/" block-begin: to word! "[" block-end: to word! "]" attrs: make block! 20 ;output: make block! 1000 input-str: none att-data: data: temp: tag-name: att-name: enc-name: c: none ;-- Character Sets joinset: func [cset chars] [insert copy cset chars] diffset: func [cset chars] [remove/part copy cset chars] space: charset [#"^(09)" #"^(0A)" #"^(0D)" #" "] char: charset [#"^(09)" #"^(0A)" #"^(0D)" #" " - #"^(FF)"] Letter: charset [ #"A" - #"Z" #"a" - #"z" #"^(C0)" - #"^(D6)" #"^(D8)" - #"^(F6)" #"^(F8)" - #"^(FF)" ] ;Digit: charset [#"0" - #"9"] alpha-num: joinset Letter "0123456789" ; need to allow: Digit name-first: joinset Letter "_:" NameChar: joinset alpha-num ".-_:" data-chars: diffset char "<" ; "&<" Name: [name-first any NameChar] S: [some space] ;-- XML Rules document: [prolog element to end] AttValue: ["'" copy att-data to "'" skip | {"} copy att-data to {"} skip] Comment: ["<!--" copy data thru "-->" ( if preserve-comments [ insert tail output join "<!--" data ;either empty? output [ ;insert output join "<!--" data ;][ ;either string? last output [ ;append first back tail output join "<!--" data ;][ ;insert tail output join "<!--" data ;] ;] ]) ] PI: ["<?" thru "?>"] CDSect: ["<![CDATA[" copy data to "]]>" "]]>" ( trim data if not empty? data [ either string? data [ insert tail output first parse/all data "^/" ][ insert tail output data ] ] ) ] prolog: [opt S opt XMLDecl any [Misc | "<!" thru ">"]] XMLDecl: ["<?xml" VersionInfo opt EncodingDecl thru "?>"] VersionInfo: [S "version" Eq ["'" VersionNum "'" | {"} VersionNum {"}]] Eq: [opt S #"=" opt S] ;VersionNum: [copy temp some NameChar (print ["XML Version:" temp])] VersionNum: [copy temp some NameChar] Misc: [Comment | PI | S] element: [ Comment | s-tag [ "/>" ( insert tail output to-url tag-name insert tail output attrs clear attrs insert tail output lit-slash ) | #">" ( insert tail output to-url tag-name insert tail output attrs clear attrs insert tail output block-begin ) any content ETag ( ;if empty? data [ insert tail output block-end ;] ;clear data ) ] ] s-tag: ["<" copy tag-name Name any [S Attribute] opt S] Attribute: [copy att-name Name Eq AttValue ( replace/all att-data """ #"^"" replace/all att-data "'" #"'" replace/all att-data ">" #">" replace/all att-data "<" #"<" replace/all att-data "&" #"&" append attrs reduce [to-url att-name att-data] ) ] ETag: ["</" copy tag-name Name opt S ">"] content: [CDSect | element | copy data some data-chars ( if not empty? data [ replace/all data ">" #">" replace/all data "<" #"<" replace/all data "&" #"&" either string? last output [ append first back tail output data ][ insert tail output data ] ] ) ] Latin-first: charset [#"A" - #"Z" #"a" - #"z"] Latin: joinset Latin-first "0123456789._-" EncodingDecl: [S "encoding" Eq [{"} Encname {"} | "'" Encname "'"]] Encname: [copy enc-name [Latin-first any Latin]] hichar: charset [#"^(80)" - #"^(FF)"] unicode: [any [ #"^(00)" copy c char (append input-str c) | #"^(C2)" copy c hichar (append input-str c) | #"^(C3)" copy c hichar (append input-str (to-char c) + #"^(40)") | copy c char (append input-str c) ]] set 'xml2rebxml func [ "Parses XML code and returns as block structure" code [string!] "XML code to parse" /preserve "Preserve comments" ][ preserve-comments: either preserve [true] [false] clear output enc-name: none parse/all/case code [prolog to end] either enc-name = "ISO-8859-1" [ input-str: code ][ input-str: make string! 16384 clear input-str parse/all/case code unicode ] clear output parse/all/case input-str document load mold output ] ] ; context
halt ;; to terminate script if DO'ne from webpage