Script Library: code-colorizer.r

Script Library: 1247 scripts
Archive version of: code-colorizer.r ... version: 1 ... oldes 14-Mar-2008

Amendment note: new script || Publicly available? Yes
REBOL [
    Title:   "Code-colorizer"
    Date:    12-Mar-2008/15:43:13+1:00
    Name:    "Rebol Code colorizer"
    Version: 0.9.2
    File:    %code-colorizer.r
    Author:  "David 'Oldes' Oliva"
    Email:   oliva.david@seznam.cz
    Home:    http://box.lebeda.ws/~hmm/
    Owner:   none
    Rights:  none
    Needs:   none
    Tabs:    none
    Usage:   [
		code-colorizer/remove-parens?: off
		code-colorizer/footer-final: {
		<script src="http://www.google-analytics.com/urchin.js" type="text/javascript"></script>
		<script type="text/javascript">_uacct = "UA-1886645-1";urchinTracker();</script>}
		
		colorize/save/title
		    %code-colorizer.r
		    %code-colorizer.html
		    "Rebol Code Colorizer"
	]
    Purpose: {To convert Rebol script into html with colorized code. Using string based parsing.}
    Comment: {
    	To change colors, download this CSS style: http://box.lebeda.ws/~hmm/css/rebolcc.css
    	modify it and use it (change code-colorizer/css-file to your new version)
    }
    History: [
    	0.9.2 12-Mar-2008 {
    		- Fixed bug with single escape character in string ( "^^" )
    	}
    	0.9.1 8-Mar-2008 {
    		- Fixed bug with conversion of REBOL word to url
    		- Added new color class .iss for issue! datatype.
    	}
    	0.9.0 28-Sep-2007 {
    		Complete remake of the script using string based parsing (besause of recursions limits in the old code)}
    	0.0.1 29-Oct-2003 {
    		%colorize-rswf.r script inspired by Carl's %color-code.r file but was designed
    		to colorize using CSS classes instead of font tags
    	}
	]
    Language: none
    Type:     none
    Content:  none
	library: [
		level:        'intermediate
		platform:     'all
		type:         [tool]
		domain:       [html parse text-processing visualization web]
		tested-under: none
		support:      none
		license:      'bsd
		see-also:     none
	]
;    preprocess: true
;    require: [
;    	rs-project 'seo-name
;	]
]

;### Seo-name
comment {
#### RS include: %seo-name.r
#### Title:   "seo-name"
----} 
unless value? 'seo-name [
	seo-name: func [
		"Creates SEO friendly version of string with diacritics"
		str
		/local new normal-chars trans-chars other-char pos pos2
	][
		was-type?: type? str
	    new: lowercase copy as-string str 
	    normal-chars: charset [#"A" - #"Z" #"a" - #"z" #"0" - #"9" #"_" #"."] 
	    trans-chars:  charset "ìéëšœèæøàžŸýáäíéóöïòúùü"
	    other-char: complement (union normal-chars trans-chars)
	    parse/all new [
	        some [
	            some normal-chars 
	            | some [
	                pos: [
	                      ["ì" | "é" | "ë"] (change pos "e") 
	                    | ["š" | "œ"]       (change pos "s") 
	                    | ["è" | "æ"]       (change pos "c") 
	                    | ["ø" | "à"]       (change pos "r") 
	                    | ["ž" | "Ÿ"]       (change pos "z") 
	                    | "ý"               (change pos "y") 
	                    | "í"               (change pos "i") 
	                    | ["ó" | "ö"]       (change pos "o") 
	                    | ["á" | "ä"]       (change pos "a") 
	                    | ["ù" | "ú" | "ü"] (change pos "u") 
	                    | "ï"               (change pos "d") 
	                    | ""               (change pos "t") 
	                    | "ò"               (change pos "n")
	                ]
	            ] 
	            | some other-char pos2: (pos2: change/part pos "-" pos2) :pos2
	        ]
	    ] 
	    to was-type? head new
	] 
]
comment "---- end of RS include %seo-name.r ----" 

;### Code-colorizer

code-colorizer: context [

;## Default settings
	remove-parens?: off ;removes parens from code (used to document big parsing rules)
	remove-newline-comments?: on ;removes all comments which start at newline
	index-comments?: on ;creates index from special comments
	break-on-error?: on ;stops parsing if founds invalid code

    out: str: x:   none
	output?:       true
	level-block:  
	level-paren:  
	level-string:  0
	string-type:   none
	string-buffer: make string! 10000
	index-html:    make string! 1000
	index-type:    none
	css-file:      http://box.lebeda.ws/~hmm/css/rebolcc.css
	footer-final:  none ;using this to add final note (for example counter) on HTML page
    
;## Basic charsets
;** These charsets are used in string based parse in Colorize function

	ch_word-dividers: charset " ^-^/^M{}[]()^"^^;"
	ch_newlines:      charset "^/^M"
	ch_space:         charset " ^-"
	ch_spaces:        charset " ^-^/^M"
	ch_numbers:       charset "0123456789"
	ch_binary2:       charset "01"
	ch_alpha:         charset [#"a" - #"z" #"A" - #"Z"]
	ch_hexadecimal:   charset [#"a" - #"f" #"A" - #"F" "0123456789"]
	ch_tonewline:     complement ch_newlines
	ch_word:          complement ch_word-dividers
	ch_anychar:       complement charset ""
	ch_alphanum: union ch_alpha ch_numbers
	ch_base64:   union ch_alphanum union charset "+/=" ch_spaces

;## Rules used for parsing
	rl_integer:   [some ch_numbers]
	rl_word:      [some ch_word]
	rl_binary2:   [ "2#{" any [8 [ch_binary2  any ch_spaces]] "}"]
	rl_binary32:  [  "#{" any [2 [ch_alphanum any ch_spaces]] "}"]
	rl_binary64:  ["64#{" any ch_base64 "}"]
	rl_binary: [
		  rl_binary2
		| rl_binary32
		| rl_binary64
		| ["#{" | "2#{" | "64#{"] (
			if level-string = 0 [
				print ["!!! Invalid binary --" copy/part str 20]
				print [level-string level-block level-paren]
				if break-on-error? [break]
			]
		)
	]
	rl_pair: [some ch_numbers #"x" some ch_numbers]
	rl_char: [
		{#"} opt #"^^" 1 ch_anychar {"}
	]
;** These are groups with words used in Rebol

	rl_comparison: [
		"<" | "<=" | "<>" | "=" | "==" | "=?" | ">" | ">=" | "equal?" | "greater-or-equal?" |
		"greater?" | "lesser-or-equal?" | "lesser?" | "maximum-of" | "minimum-of" |
		"not-equal?" | "same?" | "sign?" | "strict-equal?" | "strict-not-equal?"
	]
	rl_context: ["alias" | "bind" | "context" | "get" | "in" | "set" | "unset" | "use" | "value?"]
	rl_control: [
		"all" | "any" | "opt" | "attempt" | "break" | "catch" | "compose" | "disarm" | "dispatch" |
		"do" | "do-events" | "does" | "either" | "else" | "exit" | "for" | "forall" | "foreach" |
		"forever" | "forskip" | "func" | "function" | "halt" | "has" | "if" | "launch" | "loop" |
		"next" | "quit" | "reduce" | "remove-each" | "repeat" | "return" | "secure" | "switch" |
		"throw" | "try" | "until" | "wait" | "while"
	]
	rl_help: [
		"?" | "??" | "about" | "comment" | "dump-face" | "dump-obj" | "help" |
		"license" | "probe" | "source" | "trace" | "usage" | "what"
	]
	rl_logic: [
		"all" | "and" | "any" | "complement" | "found?" | "not" | "or" | "random" | "xor" |
		"on" | "off" | "true" | "false" | "none"
	]
	rl_math: [
		"*" | "**" | "+" | "-" | "/" | "//" | "abs" | "absolute" | "add" | "and" | "arccosine" |
		"arcsine" | "arctangent" | "complement" | "cosine" | "divide" | "even?" | "exp" |
		"log-10" | "log-2" | "log-e" | "max" | "maximum" | "maximum-of" | "min" | "minimum" |
		"minimum-of" | "multiply" | "negate" | "negative?" | "not" | "odd?" | "or" |
		"positive?" | "power" | "random" | "remainder" | "sign?" | "sine" | "square-root" |
		"subtract" | "tangent" | "xor" | "zero?"
	]
	rl_io: [
		"ask" | "change-dir" | "clean-path" | "close" | "confirm" | "connected?" |
		"delete" | "dir?" | "dirize" | "dispatch" | "do" | "echo" | "exists?" | "get-modes" |
		"info?" | "input" | "input?" | "list-dir" | "load" | "make-dir" | "modified?" |
		"open" | "pick" | "poke" | "prin" | "print" | "query" | "read" | "read-io" | "rename" |
		"resend" | "save" | "script?" | "secure" | "send" | "set-modes" | "set-net" | "size?" |
		"split-path" | "suffix?" | "to-local-file" | "to-rebol-file" | "update" | "wait" |
		"what-dir" | "write" | "write-io"
	]
	rl_series: [
		"alter" | "append" | "array" | "at" | "back" | "change" | "clear" | "copy" | "difference" |
		"empty?" | "exclude" | "extract" | "fifth" | "find" | "first" | "found?" | "fourth" |
		"free" | "head" | "head?" | "index?" | "insert" | "intersect" | "join" | "last" | "length?" |
		"load" | "maximum-of" | "minimum-of" | "offset?" | "parse" | "pick" | "poke" | "random" |
		"rejoin" | "remove" | "remove-each" | "repend" | "replace" | "reverse" | "second" |
		"select" | "skip" | "sort" | "switch" | "tail" | "tail?" | "third" | "union" | "unique"
	]
	rl_dataset: [
		"alter" | "charset" | "difference" | "exclude" | "extract" | "intersect" | "union" | "unique"
	]
	rl_specialstring: [
		"build-tag" | "checksum" | "clean-path" | "compress" | "debase" | "decode-cgi" | "decompress" |
		"dehex" | "detab" | "dirize" | "enbase" | "entab" | "find" | "form" | "import-email" | "lowercase" |
		"mold" | "parse-xml" | "reform" | "rejoin" | "remold" | "split-path" | "suffix?" | "trim" | "uppercase"
	]
	rl_system: [
		"browse" | "component?" | "link?" | "now" | "protect" | "protect-system" | "recycle" |
		"unprotect" | "upgrade"
	]
	rl_datatype: [
		"any-block?" | "any-function?" | "any-string?" | "any-type?" | "any-word?" | "as-pair" |
		"binary?" | "bitset?" | "block?" | "char?" | "construct" | "datatype?" | "date?" | "decimal?" |
		"dump-obj" | "email?" | "error?" | "event?" | "file?" | "function?" | "get-word?" | "hash?" |
		"image?" | "integer?" | "issue?" | "library?" | "list?" | "lit-path?" | "lit-word?" | "logic?" |
		"make" | "money?" | "native?" | "none?" | "number?" | "object?" | "op?" | "pair?" | "paren?" |
		"path?" | "port?" | "refinement?" | "routine?" | "series?" | "set-path?" | "set-word?" |
		"string?" | "struct?" | "tag?" | "time?" | "to" | "to-binary" | "to-bitset" | "to-block" |
		"to-char" | "to-date" | "to-decimal" | "to-email" | "to-file" | "to-get-word" | "to-hash" |
		"to-hex" | "to-idate" | "to-image" | "to-integer" | "to-issue" | "to-list" | "to-lit-path" |
		"to-lit-word" | "to-logic" | "to-money" | "to-pair" | "to-paren" | "to-path" | "to-refinement" |
		"to-set-path" | "to-set-word" | "to-string" | "to-tag" | "to-time" | "to-tuple" | "to-url" |
		"to-word" | "tuple?" | "type?" | "unset?" | "url?" | "word?"
	]
	rl_view: [
		"alert" | "as-pair" | "brightness?" | "caret-to-offset" | "center-face" | "choose" | "clear-fields" |
		"do-events" | "dump-face" | "flash" | "focus" | "hide" | "hide-popup" | "in-window?" | "inform" |
		"layout" | "link?" | "load-image" | "make-face" | "offset-to-caret" | "request" | "request-color" |
		"request-date" | "request-download" | "request-file" | "request-list" | "request-pass" | "request-text" |
		"show" | "show-popup" | "size-text" | "span?" | "stylize" | "unfocus" | "unview" | "view" | "viewed?" | "within?"
	]

;## escape-html
	escape-html: func[data][
		data: to string! reduce data
		foreach [from to] [ "&" "&amp;" "<" "&lt;" ">" "&gt;"][
			replace/all data from to
		]
		data
	]
;## emit
	emit: func [data /class cl /html] [
;print ["EMIT:" mold data cl (mold copy/part str 5)]
		case [
			level-string > 0 [
				append string-buffer data
			]
			output? [
	       		repend out either class [
	       			[
	       				{<span class=} cl {>}
	       				escape-html data
	       				"</span>"
	   				]
				][
					either html [data][	escape-html data ]
				]
	       	]
		]
	]

;## add-index-comment
	add-index-comment: func[x /local st n][
		parse/all x [
			[
				  "###" (st: 'co2)
				| "##"  (st: 'co3)
				| "**"  (st: 'co4)
				| "*"   (st: 'co5)
				| "-"   (st: 'co6)
			] copy x some ch_tonewline (
				if st = 'co3 [
					;use only content to paren
					parse/all x [copy x to "(" to end]
				]
				
				case [
					st = 'co2 [
						
						append index-html rejoin [
							case [
								none? index-type ["<ol class=index>"]
								;index-type <> 'co2 ["</ol>^/"]
								all [
									not empty? index-html
									#"," = last index-html
								][ remove back tail index-html]
								true [""]
							]
							{<li class=co2><a href="#m_} n: seo-name x: trim/head/tail x {" class=a1>} x {</a>}
						]
						append out rejoin [{<a name="m_} n {"></a>}]
						index-type: 'co2
					]
					st = 'co3 [
						append index-html rejoin [
							;either index-type = 'co2 ["^/<ol class=co3>^/"][""]
							;{^-<li class=co3><a href="#s_} n: seo-name x {">} x {</a>}
							{^/<a href="#s_} n: seo-name x: trim/head/tail x {" class=a2>} x {</a>,}
						]
						append out rejoin [{<a name="s_} n {"></a>}]
						index-type: 'co3
					]
					
				]
				emit/class join ";" x st
			)
		]
	]

;## colorize
	set 'colorize func[source /save outfile /title ttl /local source-file text x][
		text: either any [file? source url? source][
			source-file: source
			read/binary  source
		][	source ]
		out: make string! 3 * length? text

		level-block:  
		level-paren:  
		level-string: 0
		string-type: index-type: none
		clear string-buffer
		clear index-html
		
		loop 1 [ ;<-- to be able break parsing
			parse/all detab text [
				any [
					str: ;(print [">>>" mold copy/part str 10])
					  #"^^" [
					  	#"^^" (emit "^^^^")
					  	|
					  	#"(" some ch_hexadecimal #")" x: (
					  		emit/class copy/part str x 'ch
					  	)
					  	|
					  	#"{" (
					  		case [
					  			level-string = 0 [
					  				emit #"^^"
					  				string-type: #"{"
									level-string: 1
									emit #"{"
								]
								true [
									emit "^^{"
								]
							]
						)
						| #"^"" (
							case [
								level-string = 0 [
									emit #"^^"
									string-type: #"^""
									level-string: 1
									emit {"}
								]
								true [
									emit {^^"}
								]
							]
						)
						| (emit #"^^")
					]
					| copy x rl_char     (emit/class x 'ch )
					| {"}  (
						either level-string = 0 [
							string-type: #"^""
							level-string: level-string + 1 emit {"}
						][
							emit {"}
							if string-type = #"^"" [
								level-string: 0
								string-type: none
								emit/class string-buffer 'st0
								clear string-buffer
							]
						] 
					)
					| copy x rl_binary   (emit/class x 'bi0)
					
					| copy x rl_word     (
						case [
							#":" = last x [emit/class x 'sw]
							#"!" = last x [emit/class x 'dt]
							parse/case x ["REBOL"][ emit/html {<a href="http://www.rebol.com">REBOL</a>} ]
							true [
								parse x [
									  rl_pair (emit/class x 't1)
									| some ch_numbers (emit/class x 'nu0)
									| #"#" to end  (emit/class x 'iss)
									| [#"%" | "http://" | "ftp://" | "https://"] to end (emit/class x 'fl)
									| #"'" to end (emit/class x 'lw)
									| rl_comparison end (emit/class x 'kw2)
									| rl_context    end (emit/class x 'kw3)
									| rl_control    end (emit/class x 'kw4)
									| rl_help       end (emit/class x 'kw5)
									| rl_logic      end (emit/class x 'kw6)
									| rl_math       end (emit/class x 'kw7)
									| rl_io         end (emit/class x 'kw8)
									| rl_series     end (emit/class x 'kw9)
									| rl_dataset    end (emit/class x 'kw10)
									| rl_specialstring end (emit/class x 'kw11)
									| rl_system     end (emit/class x 'kw12)
									| rl_datatype   end (emit/class x 'kw13)
									| rl_view       end (emit/class x 'kw14)
									| (emit x)
								]
								
							]
						]
					)
					| #"[" (
						level-block:  level-block  + 1
						either level-string > 0 [
							emit #"["
						][	emit/class #"[" 'br0 ]
					)
					| #"]" (
						level-block:  level-block  - 1
						either level-string > 0 [
							emit #"]"
						][	emit/class #"]" 'br0 ]
					)
					| #"(" (
						if remove-parens? [ output?: off ]
						level-paren:  level-paren  + 1
						either level-string > 0 [
							emit #"("
						][	emit/class #"(" 'br0 ]
					)
					| #")" (
						either level-string > 0 [
							emit #")"
						][	emit/class #")" 'br0 ]
						if 0 = (level-paren:  level-paren  - 1) [output?: on]
						
					)

					| #"{" (
						either level-string = 0 [
							emit/class #"{" 'br1
							string-type: #"{"
						][
							emit #"{"
						]
						
						if string-type = #"{" [
							level-string: level-string + 1
						]
						
					)
					| #"}" (
						either string-type = #"{" [
							level-string: level-string - 1
							either level-string = 0 [
								emit/class string-buffer 'st0
								emit/class #"}" 'br1
								string-type: none
								clear string-buffer
							][
								emit #"}"
							]
						][
							emit #"}"
						]
					)
					| #";" copy x [any ch_space any ch_tonewline] new: (
						if none? x [x: ""]
						either level-string = 0 [
							case [
								all [
									index-comments?
									add-index-comment x
								] none
								
								all [remove-newline-comments? (find ch_newlines first back str)][
									;remove this comment from output with the newline as well
									parse/all new [some ch_newlines new: to end]
								]
								
								true [
									emit/class join ";" x 'co1
								]
							]
						][
							emit #";"
							new: next str
						]
					) :new
					| copy x some ch_newlines (
						either string-type = #"^"" [
							print ["!!! Invalid string --" mold copy/part string-buffer 20]
							if break-on-error? [break]
						][	emit x ]
					)
					| copy x some ch_spaces (emit x)
					
					()
				]
				(
					if level-string > 0 [print ["!!! Invalid string!" level-string mold string-type] ]
					if break-on-error? [break]
				)
			]
		]
		
		if not empty? index-html [append index-html "</ol>"]
	    if save [

	    	write/binary outfile rejoin [
	    		{<html><head>}
	    		{<LINK rel="stylesheet" href="} css-file {"/>}
	    		{<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=windows-1250">}
				{<title>} any [ttl "a Rebol code"] {</title>}
	    		{</head>^/}
	    		{<body bgcolor="#ffffff">}
	    		{<div class=header>}
	    		either title [rejoin ["<h1>" ttl "</h1>"]][""]
	    		either none? source-file [""][
	    			rejoin [
	    				{Source: <b>} last split-path source-file {</b> modified: <b>} modified? source-file {</b>}
	    			]
	    		]
	    		either remove-parens? ["<div style='color:#F00;'>Parens were removed from the source!</div>"][""]
	    		{</div>^/}
	    		index-html
	    		{<pre class=rebol>}
	    		out
	    		{</pre>^/}
	    		{<div class=footer>Generated } now
	    		{ by <a href="http://box.lebeda.ws/~hmm/rebol/code-colorizer.r">%code-colorizer.r</a> Rebol script</div>}
	    		any [footer-final ""]
	    		{</body></html>}
    		]
		]
		out
	]
	
	
]

;print colorize/save %test-code.txt %test.html
;colorize/save %code-colorizer.r %test.html
;code-colorizer/remove-parens?: off
;code-colorizer/footer-final: {
;<script src="http://www.google-analytics.com/urchin.js" type="text/javascript"></script>
;<script type="text/javascript">_uacct = "UA-1886645-1";urchinTracker();</script>}

;colorize/save/title
;	%code-colorizer.r
;	%code-colorizer.html
;	"Rebol Code Colorizer"

;colorize/save/title
;	to-rebol-file "I:\rebol\rs\projects-rswf\rswf\new\swf-tag-rules_enczes.rb"
;	%rswf-main-rules-full-code.html
;	"Rebol/Flash Dialect (RSWF) main rules"

;### Test code
comment [
	;some code to test if it works
	{str{nasted} and escaped ^{}
	"^^" "^(1f)" 
	multilined-string: {
		some text
		on more
		lines
		with code inside:
		x: sine 1 + 2
	}
	
	x: sine 1 + 2
	
	;pair datatype:
	320x240
	
	;char! datatype:
	#"A"
	;with escape:
	#"^-" = tab
	
	;tuple! datatype:
	red: 255.0.0
	
	;tag!:
	[<tag> 'hello </tag>]
	
	;valid word with escape	char
	word^s
	"string escaped^" char "
	;test
	to image!
	#{}
	#{1
	2}
	64#{Eg==}
	2#{00000000}
	
	;issue
	#FF0000 ;red
	
	table: [
	    q0: "# # L" q0
	        "1 1 L" q0
	        "+ 1 R" q1
	    q1: "1 1 R" q1
	        "# # L" q2
	    q2: "1 # L" q3
	    q3: "1 1 L" q3
	        "+ 1 R" q1
	        "# # R" q4
	    q4: "1 1 R" q4
	        "# # R" q5
	    q5: "# # L" stop
	]
	
	
]


		code-colorizer/footer-final: {
		<script src="http://www.google-analytics.com/urchin.js" type="text/javascript"></script>
		<script type="text/javascript">_uacct = "UA-1886645-1";urchinTracker();</script>}