;; ================================================= ;; Script: parse-string.r ;; downloaded from: www.REBOL.org ;; on: 28-Mar-2024 ;; at: 21:17:14.234267 UTC ;; owner: stan [script library member who can update ;; this script] ;; ================================================= ;; ================================================== ;; email address(es) have been munged to protect them ;; from spam harvesters. ;; If you were logged on the email addresses would ;; not be munged ;; ================================================== REBOL [ Title: "Func Parse String" Date: 20-Jul-2004 Version: 0.8.1 File: %parse-string.r Author: "Stan Silver" Email: %stasil213--yahoo--com Purpose: {Creates and returns a function that performs custom string substitution} Notes: { The file contains one function, called 'func-parse-string, and tests. This function is called with one argument - a rule block. The rule block is similar in spirit to a rebol parse rule block, but with different syntax. A new function is returned that should be stored in a variable. In other words, func-parse-string acts like a function definition (like func and function). The new function can be used repeatedly to perform string parsing and substitution. The parse string rule block may contain any number of any of the following lines [set-word-for-user-defined-rule]: [any code - same as a rebol parse rule] keep [rule] remove [rule] replace [rule] [expression] before [rule] [expression] after [rule] [expression] before-after [rule] [before expression] [after expression] where [rule] is a char! or string! or block! (what to match in the input string) [expression] is a char! or string! or block! (what to put in the output string) The rules are checked top to bottom. Put the rules to check first on top. For example, put a keep #"x" above a remove a-to-z. You may "do" the file as it is, and the tests will not be run. To run the tests, "do" the file with the argument 'test Example: do/args %parse-string.r 'test To see the parse rule that is generated, use the /rule and /print options (CURRENTLY NOT WORKING CORRECTLY) Example: func-parse-string/rule/print [replace "x" "X"] To see the object that is generated, use the /object and /print options Example: func-parse-string/object/print [replace "x" "X"] The returned function is wrapped in an object (unseen by the user) to provide a local context for the variables used. Loading this file and running the code should only add one word to the namespace: 'func-parse-string. This code has not been optimized for speed. } Example: { (look at the tests for other examples) my-parse: func-parse-string [ start: charset [#"a" - #"c"] efg-pair: ["ef" | "fg" | "eg"] keep #"a" remove [start] remove #"d" replace [efg-pair] "_x_" replace "h" "HH" before #"k" "[" after "l" #"]" before-after "m" "<<" ">>" ] my-parse "abcdefghijklm" ==> "a_x_gHHij[kl]<>" func-parse-string/object/print [...RULE BLOCK AS ABOVE...] ==> make object! [ start: charset [#"a" - #"c"] efg-pair: ["ef" | "fg" | "eg"] parse-rule: [ copy _temp #"a" (append _output _temp) | [start] | #"d" | [efg-pair] (append _output reduce "_x_") | "h" (append _output reduce "HH") | copy _temp #"k" (append _output reduce "[" append _output _temp) | copy _temp "l" (append _output _temp append _output reduce #"]") | copy _temp "m" (append _output reduce "<<" append _output _temp append _output reduce ">>") | copy _temp skip (append _output _temp) ] _temp: none _output: none parse-string-function: func [input [string!]][ _output: copy "" parse/all input [some parse-rule] _output ] ] } library: [ level: 'advanced platform: 'all type: 'dialect domain: 'text-processing tested-under: none support: none license: none see-also: none ] ] ;======; ; CODE ; ;======; func-parse-string: function [string-rule [block!] /rule /object /print] [ ;=================; ; local variables ; ;=================; _rule _rul _expression _exp _exp1 _exp2 _keep _remove _replace _before _after _before-after _set-word _wor _any-after-set-word _any parse-definitions parse-rule parse-object format-rule ] [ ;====================================================================; ; internal rules, used to change the string-rule into the parse-rule ; ;====================================================================; _rule: [string! | char! | block!] _expression: [string! | char! | block!] _keep: [ 'keep copy _rul _rule (append parse-rule reduce [ 'copy '_temp first _rul to paren! reduce [ 'append '_output '_temp ] '| ]) ] _remove: [ 'remove copy _rul _rule (append parse-rule reduce [ first _rul '| ]) ] _replace: [ 'replace copy _rul _rule copy _exp _expression (append parse-rule reduce [ first _rul to paren! reduce [ 'append '_output 'reduce first _exp ] '| ]) ] _before: [ 'before copy _rul _rule copy _exp _expression (append parse-rule reduce [ 'copy '_temp first _rul to paren! reduce [ 'append '_output 'reduce first _exp 'append '_output '_temp ] '| ]) ] _after: [ 'after copy _rul _rule copy _exp _expression (append parse-rule reduce [ 'copy '_temp first _rul to paren! reduce [ 'append '_output '_temp 'append '_output 'reduce first _exp ] '| ]) ] _before-after: [ 'before-after copy rul _rule copy exp1 _expression copy exp2 _expression (append parse-rule reduce [ 'copy '_temp first rul to paren! reduce [ 'append '_output 'reduce first exp1 'append '_output '_temp 'append '_output 'reduce first exp2 ] '| ]) ] ;==========================================================================; ; internal rules, used to put definitions into the parse definitions block ; ;==========================================================================; _set-word: [ copy _wor set-word! (append parse-definitions to set-word! first _wor) ] _any-after-set-word: [ copy _any any-type! (append/only parse-definitions first _any) ] ;======================================================================; ; change the string-rule into the parse definitions and the parse rule ; ;======================================================================; parse-definitions: copy [] parse-rule: copy [] parse string-rule [ some [ _keep | _remove | _replace | _before | _after | _before-after | _set-word | _any-after-set-word ] ] ;==========================================; ; add the default clause to the parse rule ; ;==========================================; append parse-rule reduce [ 'copy '_temp 'skip to paren! [append _output _temp] ] ;===============================; ; build the parse string object ; ;===============================; append parse-definitions [parse-rule:] append/only parse-definitions parse-rule append parse-definitions [_temp:] append parse-definitions none append parse-definitions [_output:] append parse-definitions none append parse-definitions [parse-string-function: func [input [string!]] [ _output: copy "" parse/all input [some parse-rule] _output ] ] parse-object: context parse-definitions ;=================================================; ; user options, mainly for learning and debugging ; ;=================================================; if rule [ either print [return prin [format-rule parse-rule CRLF]] [return parse-rule] ] if object [ either print [return prin [mold parse-object CRLF]] [return parse-object] ] format-rule: function [rule [block!]] [ rule-string output temp ] [ rule-string: mold rule output: copy "" parse/all rule-string [some [ " | " (append output " |^M^/") | copy temp skip (append output temp) ]] output ] ;========================================================================; ; (for normal use) return the parse string function of the object ; ; we assume that because the function points to the object, ; ; the object sticks around in memory to provide context for the function ; ;========================================================================; get in parse-object 'parse-string-function ] ;=======; ; TESTS ; ;=======; ;=====================================================; ; to run tests, do the file with 'test as its argument ; ; (e.g.) do/args %parse-string.r 'test ; ;=====================================================; if 'test = system/script/args [ ;=====================================================================; ; a short-cut function to repeatedly run the tests (under MS Windows) ; ;=====================================================================; p: func [] [ do/args %/c/Program Files/rebol/view/local/parse-string.r 'test ] ;===============; ; test function ; ;===============; ???: func [result 'ignore desired] [ if not result = desired [ print ["TEST FAILED" mold result ignore mold desired] ] ] ;========================; ; pre test set variables ; ;========================; alpha: 15 parse-string-function: 16 _temp: 17 _output: 18 x: 19 ;=======; ; tests ; ;=======; ps1: func-parse-string [ alpha: charset [#"a" - #"z"] remove "h" remove #"e" replace "ll" "hey" after "o" "-boy" before "t" #"_" before-after "r" ">>" "<<" ] ps2: func-parse-string [ alpha: charset [#"a" - #"z"] replace [some alpha] "hi" ] ps3: func-parse-string [ delimiter: charset reduce [#" " tab cr newline] alpha: charset [#"a" - #"z" #"A" - #"Z"] digit: charset "0123456789" alphanum: union alpha digit replace [delimiter] #"_" keep #"b" keep #"2" before-after [alphanum] #"/" #"\" ] ps4: func-parse-string [ dots: [some #"."] alpha: charset [#"a" - #"z" #"A" - #"Z"] digit: charset "0123456789" alphanum: union alpha digit alphanums: [some alphanum] x: none ; so x is not global replace [copy x some "a"] [length? x "A"] before [copy x some "b"] [length? x] after [copy x some "c"] [length? x] before-after [copy x dots alphanums] [length? x "["] ["]" length? x] ] ps5: func-parse-string [ replace #" " #"_" replace [#"/" | #"\"] "slash" replace #"." #"o" ] ??? ps1 "hello there" >> "heyo-boy _t>>r<<" ??? ps1 "hello world" >> "heyo-boy wo-boy>>r<> "hi hi" ??? ps2 "hello world" >> "hi hi" ??? ps3 "!@# $%) &*( abc 123" >> "!@#_$%)_&*(_/a\b/c\_/1\2/3\" ??? ps4 "a bb ccc .b ..c d" >> "1A 2bb ccc3 1[.b]1 2[..c]2 d" ??? ps5 "2.3 3/4 4\5 6.7" >> "2o3_3slash4_4slash5_6o7" ;=========================================; ; post test that variables are not global ; ;=========================================; ??? alpha >> 15 ??? parse-string-function >> 16 ??? _temp >> 17 ??? _output >> 18 ??? x >> 19 ;===========================================; ; fail a test to show the tests are running ; ;===========================================; ??? 'should >> 'fail ;=======================; ; show the file is done ; ;=======================; 'EOF ]