Script Library: xpath.r

Script Library: 1247 scripts
Archive version of: xpath.r ... version: 1 ... neoreb 4-Mar-2007

Amendment note: new script || Publicly available? Yes
REBOL [
    Library: [
        level: 'intermediate
        platform: 'all
        type: tool
        domain: [ai xml]
        tested-under: none
        support: none
        license: none
        see-also: none
        ]

    Title: "xpath.r"
    Date: 03-march-2007
    File: %xpath.r
    Author: "Alban Gabillon"
    Version: 0.1
    Purpose: {
    This script shows how to implement an XPath interpreter in Rebol/Prolog.
    I went to the easiest. Therefore, this interpreter is not complete. 
    It is only a kind of "Proof of Concept". It lacks many features.
    Currently it can parse a document containing elements and pcdata only. In particular it does not handle ATTRIBUTE.
    In this script I see an xml document as a tree of untyped nodes. 
    The consequence is that the syntax DOES NOT FOLLOW exactly the XPath syntax.
    (See the EXAMPLES below to understand how it works).
    Note: It could be perfectly possible to directly parse XML data instead of rebxml data 
    but it would be a bit more difficult to write the parse_doc function}]


samplexml: {
<movie>
    <title>Star Trek: Insurrection</title>
    <star>Patrick Stewart</star>
    <star>Brent Spiner</star>
    <theater>
        <theater-name>MonoPlex 2000</theater-name>
        <showtime>14:15</showtime>
        <showtime>16:30</showtime>
        <showtime>18:45</showtime>
        <showtime>21:00</showtime>
        <price>
            <adult>$8.50</adult>
            <child>$5.00</child>
        </price>
    </theater>
    <theater>
        <theater-name>Bigscreen 1</theater-name>
        <showtime>19:30</showtime>
        <price>$6.00</price>
    </theater>
</movie>
}



examples: {EXAMPLES:

For selecting the theaters
---------------------------------------------
XPath==> /movie/theater

For selecting all the stars
----------------------------------------
XPath==> //star

For selecting the first showtime of all theaters
------------------------------------------------------------------------
XPath==>//theater/showtime(1)

For selecting all the showtimes of the second theater
------------------------------------------------------------------------------------
XPath==>//theater(2)/showtime

For selecting all the showtimes of the Bigscreen1 theater
-----------------------------------------------------------------------------------------
XPath==>//theater[./theater-name/Bigscreen 1]/showtime
OR
XPath==>//theater[.//Bigscreen 1]/showtime

For selecting the theaters with a showtime at 21:00
--------------------------------------------------------------------------------
XPath==>//theater[./showtime/21:00]

}


do http://perso.orange.fr/alban.gabillon/rebsite/xpath/xml2rebxml.r
do http://perso.orange.fr/alban.gabillon/rebsite/xpath/prolog.r

parsepath: func [
"parse an xpath expression - output is a block [pathup axis nodetest predicate position]"
string [string!]
/local workstring pathup test predicate position axis result][
either string = "root" [result: copy ["root" "" "" "" ""]][
predicate: position: ""
workstring: reverse copy string
switch first workstring [
	#"]" [; there is a predicate tied to the nodetest
		predicate: copy ""
		rec: 1
		while [rec > 0][
			workstring: next workstring 
			if workstring/1 = #"]" [rec: rec + 1]
			if workstring/1 = #"[" [rec: rec - 1]
			either rec > 0 [append predicate workstring/1][workstring: next workstring]]
		reverse predicate]
	#")" [; there is a position tied to the nodetest
		position: copy ""
		workstring: next workstring 
		parse workstring [copy position to  "(" thru "(" mark:]
		position: to-integer reverse position
		workstring: mark]]
; nodetest
parse workstring [copy test to  "/" mark:]
reverse test
; pathup and axis
workstring: mark
workstring: next  workstring
either workstring/1 = #"/" [axis: copy "//" pathup: copy next workstring][axis: copy "/" pathup: copy workstring]
reverse pathup
result: copy reduce [pathup axis test predicate position]]]

parse_doc: func [
"parse the rebxml block and create db atomic facts"
parent [block!]
"parent block"
data [block!]
"child block"
/local element elementlist search pos][
element: none
elementlist: copy []
While [not tail? data] [
	; "switch" instead of "either". Indeed one day somebody might need to extend this function (e.g. for considering attributes)
	switch/default type?/word data/1 [
		block! [parse_doc element data/1]]
		[either search: find elementlist data/1 [
			pos: search/2: search/2 + 1][
			pos: 1
			append elementlist reduce [data/1 1]]
		append db_facts compose/deep [index [[(data)] (pos)]]
		element: data
		append db_facts compose/deep [child [[(element)] [(parent)]]]]
	data: next data]]
	
prompt: has [expression r][
	expression: copy ""
	expression: ask "XPath==> "
	either not empty? expression [
		r: goal db probe [
			xpath [expression X]
			(probe X)]
		print [r "solution(s)"]
		false][true]]


db_facts: copy []
xmldata: copy samplexml
trim/lines xmldata
parse xmldata [any [to "> <" mark: (mark: next mark remove mark mark: next mark) :mark]]

; create the document root (not to confuse with the element root)
doc: copy [/]
doc: append/only doc xml2rebxml xmldata

;create database atomic facts
parse_doc doc doc/2
db: assert none [xml [doc]]
assert db db_facts

tree_geometry_rules: [
	descendant [X Y][
		child [X Y]]
	descendant [X Y][
		child [X Z]
		descendant[Z Y]]]


comment {COMMENT lines between ============ and UNCOMMENT the 
line between *************** if you want as much deduction as possible (but low performances)}
;=======================================
db2: copy db
assert  db2 tree_geometry_rules
extended_db_facts: copy []
for-which db2 [X Y] [
	descendant [X Y]
][
	append extended_db_facts compose/deep [descendant [[(X)] [(Y)]]]
]
assert db extended_db_facts
;=======================================
;****************************************************************
; assert db tree_geometry_rules
;****************************************************************

;create xpath rules
xpath_rules: [
	; output is a leaf
	xpath [P X][
		xpa [P X]
		equal? [true (none? X/2)]] 
	; output is a tree
	xpath [P Y][
		xpa [P X]
		equal? [false (none? X/2)]
		equal? [Y (reduce [X/1 X/2])]]
	; for addressing the whole document
	xpa ["/" X][
		xp [["root" "" "" "" ""] X]]	
	xp [["root" "" "" "" ""] doc][
		xml [X]]
	; for not having path starting with / (would be more difficult to handle in the parsepath function)
	xpa [Pl X][
		equal? [Pi (join "root" Pl)]
		equal? [Pp (parsepath Pi)]
		xp [Pp X]]
	; child axis		
	xp [P X][
		equal? ["/" (P/2)]		
		equal? ["" (P/4)]					
		equal? ["" (P/5)]
		equal? [Test (P/3)]
		equal? [Pathup (P/1)]
		equal? [Pp (parsepath Pathup)]
		xp [Pp Y]	
		child [X Y]
		equal? [(to-string X/1) Test]]
	; descendant axis
	xp [P X][
		equal? ["//" (P/2)]
		equal? ["" (P/4)]					
		equal? ["" (P/5)]
		equal? [Test (P/3)]
		equal? [Pathup (P/1)]
		equal? [Pp (parsepath Pathup)]
		xp [Pp Y]		
		descendant [X Y]
		equal? [(to-string X/1) Test]]
	; nodetest tied with a position
	xp [P X][
		not-equal? ["" (P/5)]	
		equal? [Pos (P/5)]
		equal? [Pp (parsepath join P/1 [P/2 P/3])]
		xp [Pp X]		
		index [X Pos]]
	; nodetest tied with a predicate (i.e. a path relative to the context node . (dot))	
	xp [P X][
		not-equal? ["" (P/4)]		
		equal? [Pp (parsepath join P/1 [P/2 P/3])]
		equal? [Pc (parsepath join P/1 [P/2 P/3 next P/4])]
		xp [Pp X]
		xp [Pc Y]
		descendant[Y X]]
]
assert db xpath_rules

print samplexml
print examples
print "press ENTER to leave the interpreter"
until [prompt]