// ==UserScript==
// @name          Highlight Punc
// @description    Replaces punctuation with larger, easy-to-read comma, semicolon and bigger period/fullstop. For visible sentence and paragraph structure; subclause outline, or, for the visually impaired; punctation often being hard to see on LCD screens. Sizes are configurable.
// @include       *
// @include       file://*
// ==UserScript==
//
// Note, you also must have ".FremyCompany Script" installed in iescript in order for this one to run
// Path: C:\Program Files\IEPro\userscripts\CompatibilityScriptForIE7Pro.ieuser.js
//
// First few are just wrappers for GM_functions that don't seem to be available from the FremyCompaby script
// All the others are, eg, GM_getValue, Xpath, etc.
//

(function(){
try {

 GM_getValue.tempData=new Object();

function GM_registerMenuCommand(label, func) {
    return PRO_registerMenuCommand(label, func);
}

GM_log=function(text) {
        return PRO_log(text+"\n");
};

} catch(e) { 
    PRO_log(" Caught error, GM funcs " + e.toString());
    throw(e);
}

var debugOn = /true/.test(GM_getValue("debug", false)); // PRO_setValue converts bool to string.
var dcount = 1;

function debugOut(message) {
    if (debugOn) {
	PRO_log(dcount + ":" + message + "\n");
	dcount++;
    }
}

//debugOut("Script Highlight punctuation Called");

////////
////////////////////// MAIN ///////////////////////////
////////(copy-able from firefox from here...
//
// User options....
//
//
// User options....
//
var Firefox=true;
if (navigator.appName.charAt(0) != 'N')
    Firefox=false;
	
function GM_reload() {
    if (Firefox)
	document.location.reload(false); 
    else {
	var href = window.location.href; 
	window.location.href = href;
    }
}

GM_registerMenuCommand( "====== Highlight Punctuation ======", function(){});

var sizeScaler = Number(GM_getValue("sizeScaler", 1));
var scaler = sizeScaler - 1;
var reverseScaler = 2*sizeScaler*(sizeScaler -1) + 1;

GM_registerMenuCommand((sizeScaler == 0.66 ? "\u2714 " : "    ") + "Small punctuation", 
		       function() { GM_setValue("sizeScaler", "0.66"); GM_reload()});
GM_registerMenuCommand((sizeScaler == 1 ? "\u2714 " : "    ") + "Medium punctuation", 
		       function() { GM_setValue("sizeScaler", "1");  GM_reload()});
GM_registerMenuCommand((sizeScaler == 1.33 ? "\u2714 " : "    ") + "Large punctuation" , 
		       function() { GM_setValue("sizeScaler", "1.33"); GM_reload()});
GM_registerMenuCommand((sizeScaler != 0.66 && sizeScaler != 1 && sizeScaler != 1.33) ? "\u2714 Customized scaler is x"+sizeScaler : "    Customize scale...", 
		       function() { 
			   var scaleVal = Number(prompt("Please set scaling factor for punctuation size: ", sizeScaler));
			   if ( scaleVal != 0 ) { 
			       GM_setValue("sizeScaler", scaleVal.toString());  
			       GM_reload();
			   }
		       });
GM_registerMenuCommand((debugOn ? "\u2714 " : "    ") + "Put tags-ignored info inside the HTML (debug mode)",
		       function() { 
        		   GM_setValue("debug", ! debugOn);
			   GM_reload();
		       } );

var reportPerformance=GM_getValue("reportPerformance", false);
reportPerformance = GM_getValue("reportPerformance", false);
GM_registerMenuCommand((reportPerformance ? "\u2714 " : "    " ) + "Report this script's performance",
		       function() { 
			   GM_setValue("reportPerformance", ! reportPerformance);
			   GM_reload();
		       } );

var defaultTags = "TEXTAREA  FORM  SCRIPT STYLE  ABBR H* A  DL  TH  NOSCRIPT  CODE  PRE KBD";
var excludeTags = GM_getValue("excludeTags", defaultTags);
GM_registerMenuCommand("    Set HTML tags to be ignored...",
		       function() {
			   var tags = prompt("Please edit space-separated list of tags to be excluded from punctuation highlighting.  An element whose \"parent\" matches will also be excluded.  Wildcards are allowed.  \n\nThe initial exclusion list had already been set as: " + defaultTags + "  Change the line below as required", excludeTags);
			   if (tags != null) { // so it may be empty string, then set it as empty.
			       GM_setValue("excludeTags", tags.replace(/(^\s+)|(\s+$)/g,"")) // strips extra spaces
			       GM_reload();
			   }
		       });

var exclusionTags;
if (excludeTags)  {
    exclusionTags = excludeTags.replace(/(\w|\*)\s+/g,"$1|").replace(/\*/g,"\\w*");
    exclusionTags = new RegExp("\\b("+exclusionTags+")\\b", "i");
}
else
    exclusionTags = "NULL"; // since the empty string'd match everything.


GM_registerMenuCommand( "_________________________",function(){});

// Four things are needed in the code for each type of punctuation: a data image of it, 
// its HTML/CSS representation settings, an entry in the puntuation Regexp, 
// and, finally, a case in replace function.

//
// Base64 images of punctuation...
//
var comma="data:image/gif;base64,R0lGODlhDAASABEAACH5BAEAAAIALAAAAAAMABIAgf8AAAAAAAAAAAAAAAIhlAV5m63s2onO0Iiu1DJry03XAlYkdqKiCbFl1z6kNwsFADs=";
var redDot = "data:image/gif;base64,R0lGODlhCwAKABEAACH5BAEAAAIALAAAAAALAAoAgf8AAP///wAAAAAAAAIQlIFom83toJy0VnGPXDqtAgA7";
var semicolon = "data:image/gif;base64,R0lGODlhCgAhABEAACH5BAEAAAIALAAAAAAKACEAgf8AAAAAAAAAAAAAAAIslIGmgg3qHIu02ovbqTCumXziSJamtnAbBbLZC8et1NFreONsjc4P/0vhFAUAOw==";
var colon = "data:image/gif;base64,R0lGODlhBQAMABEAACH5BAEAAAIALAAAAAAFAAwAgf8AAAAAAAAAAAAAAAINFG6nywkCo4TKsftqAQA7";

var ndash="data:image/gif;base64,R0lGODlhCQAMABEAACH5BAEAAAIALAAAAAAJAAwAgf8AAP///wAAAAAAAAIMhIWmy+0Po5y0IaEKADs=";

var leftDoubleQuote = "data:image/gif;base64,R0lGODlhGQASABEAACH5BAEAAAIALAAAAAAZABIAgf8AAAAAAAAAAAAAAAI9lAV5uwmMnINRPmMnywrze3yP2FWkGJ5kqnINurKuKUGfXG6WXVM03wNqXkFdkQYyDnFJ4tLzdDahPpahAAA7";
var rightDoubleQuote="data:image/gif;base64,R0lGODlhGQAUABEAACH5BAEAAAIALAAAAAAZABQAgf8AAAAAAAAAAAAAAAJAlAV5uwmMnINRvmMny6rmzT2h5o0jZqboaTbsuXJGCNGuRJVXvNf4TQL9ZpbcB9jxDXXGIjJXogSlSaj1ir0WAAA7";

var questionMark="data:image/gif;base64,R0lGODlhCAASABEAACH5BAEAAAIALAAAAAAIABIAgQAAAP8AAAAAAAAAAAIalIJ2yLHt0nOqqVlZlnhfw2XfSF5hB2omZRQAOw==";
var exclamMark="data:image/gif;base64,R0lGODlhBwAVABEAACH5BAEAAAIALAAAAAAHABUAgf8AAAAAAAAAAAAAAAIYlIB2mOwPo5yLmadsTur0D4adVnHYxYUFADs=";

var leftBracket="data:image/gif;base64,R0lGODlhCAAVABEAACH5BAEAAAIALAAAAAAIABUAgf8AAP7+/gAAAAAAAAIdlA9wloq5mhPyNTtlroxTr2HbhUXeB5mptXTTUwAAOw==";
var rightBracket="data:image/gif;base64,R0lGODlhCAAVABEAACH5BAEAAAIALAAAAAAIABUAgf8AAP7+/gAAAAAAAAIdBIRpl7gtFlOPVnejtIZP+W3YBXKeOU6ammmQUAAAOw==";

var rightAngle="data:image/gif;base64,R0lGODlhDwALABEAACH5BAEAAAIALAAAAAAPAAsAgf0AAAAAAAAAAAAAAAIdhIVikLjsVlMynvZSxpVe3W0eNIKfiIbq53EkUgAAOw==";
var leftAngle="data:image/gif;base64,R0lGODlhDwALABEAACH5BAEAAAIALAAAAAAPAAsAgf0AAAAAAAAAAAAAAAIdlA15krDtUDuQTqlwZJdnv1VPJ1pfqZlbypojUAAAOw==";

var invertedQ = "data:image/gif;base64,R0lGODlhCAASABEAACH5BAEAAAIALAAAAAAIABIAgQAAAP8AAAAAAAAAAAIalBVxlrmtXly0UnfnaRIjzoCfN2pQWHKQCBYAOw==";
var invertedExclam = "data:image/gif;base64,R0lGODlhBAAPABEAACH5BAEAAAIALAAAAAAEAA8Agf8AAAAAAAAAAAAAAAINFGCnl+IPWZuSrataAQA7";

var ndashChar = "\u2013", mdashChar = "\u2014", ellipsisChar = "\u2026", leftDblQuoteChar = "\u201c", rightDblQuoteChar = "\u201d";
var leftAngleChar = "\u00ab", rightAngleChar = "\u00bb", invertedQChar = "\u00bf", invertedExclamChar = "\u00a1";

var specialEndChars = rightDblQuoteChar + rightAngleChar + invertedExclamChar 
    + invertedQChar + ndashChar + mdashChar

var ruleno = 1;

// Build the HTML with the above images that will be sized and used to replace punctuation.
// Max width and height are set above, change as necessary if increasing sizes !!
//										left/right margins, height, width, alignment [em]
var commaHTML = buildHTMLcss(comma,",",						.1, .3, .55, .4,		-.25*reverseScaler, "comma"); 
var fullstopHTML = buildHTMLcss(redDot, ".",					.3, .8, .4, .55,		0, "fullstop");
var semicolonHTML = buildHTMLcss(semicolon, ";",					.2, .4, .9, .45,		-.2*reverseScaler, "semicolon");
var colonHTML = buildHTMLcss(colon, ":",						.2, .4, .8, .35,		0, "colon");//*(invertScaler)
var leftDoubleQuoteHTML = buildHTMLcss(leftDoubleQuote, "\"",	.2, .2, .7, .7,		.30, "leftDoubleQuote");
var rightDoubleQuoteHTML = buildHTMLcss(rightDoubleQuote, "\"",	.2,.2, .7, .7,	        .30, "rightDoubleQuote");
var rightDoubleQuote2HTML = buildHTMLcss(rightDoubleQuote, "\"",  -.5, .4, .7, .75,		.30, "rightDoubleQuote2");
var leftAngleHTML = buildHTMLcss(leftAngle, leftAngleChar,		 .2, 0, .8, .9,		-.1, "leftAngleQuote");
var rightAngleHTML = buildHTMLcss(rightAngle,rightAngleChar,		 0, .2, .8, .9,		-.1, "rightAngleQuote");
var questionMarkHTML = buildHTMLcss(questionMark, "?",			 .4, .8, .95, .55,		0, "questionMark");
var invertedQHTML = buildHTMLcss(invertedQ, invertedQChar,		 .4, .8, .95, .55,		0, "invertedQmark");
var exclamMarkHTML = buildHTMLcss(exclamMark, "!",				 .4, .8, 1.15, .4,	-.1, "exclamMark");
var invertedExclamHTML = buildHTMLcss(invertedExclam, invertedExclamChar,4, .8, 1.15, .35,	-.1, "invertedExclam");
var leftBracketHTML = buildHTMLcss(leftBracket, "(",				 .3, .2, 1.4, .35,		-.35*reverseScaler, "leftBracket");
var rightBracketHTML = buildHTMLcss(rightBracket, ")",			 .2, .3, 1.4, .35,		-.35*reverseScaler, "rightBracket");
var ndashHTML = buildHTMLcss(ndash, ndashChar,			         0, -.15, .2, 2,		.15, "ndash");
var mdashHTML = buildHTMLcss(ndash, mdashChar,				 0, -.1, .3, 3,		.15, "mdash");


function buildHTMLcss(image, alt, leftMargin, rightMargin, picHeight, picWidth, verticalAlign, classname) {
    GM_addStyle(".puncrep img."+classname
		+ " { "
		+ "margin-left: " + leftMargin + "em ! important;"
		+ "margin-right: " + rightMargin + "em ! important;"
		+ "vertical-align: " + verticalAlign + "em ! important;" 
		+ "float: none ! important;"
		+ "position: static ! important; display: inline-block ! important;"
		+  "overflow: hidden ! important;"
		+ "padding: 0em ! important; "
		+ "margin-top: 0px ! important;  margin-bottom: 0px ! important;" 
		+ "border-style: none ! important; "
		+ "}", ruleno++);
    return "<img class='" + classname + "' alt='" + alt + "' border='0' "
	+ " style=' " 
	+ "width: " + picWidth*sizeScaler + "em; height: " + picHeight*sizeScaler + "em; " 
	+ ( Firefox ? " " : "vertical-align: " + verticalAlign + "em; "
	    + "border-style: none ; "
	    + "margin-left: " + leftMargin + "em;"
	    + "margin-right: " + rightMargin + "em;"
	    + "position: static; display: inline-block;"
	  )
	+ "' src='" + image + "'/>"; 
    // It shouldn't be necessary to clog up the HTML with all this style information,
    // on iexplorer it is necessary, css priority schemes are too complicated, 
    // and perhaps suffer from feature creep.
}


// Start measurement of performance...
var startTime = (new Date()).getTime();
var no_of_matches =  0;
var bytesProcessed = 0;
var bytesParsed = 0;
var ignored = 0;

// Patterns, don't remove brackets since then the offset will be wrong in 
// the replace function.

// Pattern for fullstop: no abbrevs and not followed by a lower case letter....
var permittedEndCharacters = "0-9>\\u2019\\u0029\\u005D\(\\/\\-" + specialEndChars; 
// ...allowed before fullstop are: 0-9, >, single quote, end brackets )], and forward slash /and dash
var permttedBeforeDot = "[a-zA-Z\\u00c0-\\u00ef" + permittedEndCharacters + "]"; // includes accents.
var aLowerCase = "[a-z\\u00e0-\\u00ef]"; // lower case cant follow a fullstop.  Includes accented letters.

// Simple regexps, they match only one character with no context...
var questionMarkRegexp =  "\\?(?! *" +aLowerCase+ ")";
var exclamMarkRegexp =  "\\!(?! *" +aLowerCase+ ")";
var commaRegexp = ",(?![0-9])"; // exclude commas as part of numbers.
var colonRegexp = ":(?![0-9])"; // exclude colons as part of numbers.

// Abbreviations...
var abbrevs = "etc|Mr| e\\.g|i\\.e|St"; // etc., e.g. and i.e. are not to be highlighted.
//var exceptAbbrevs = "((^|\\s)(?!" + abbrevs + ")(" + permttedBeforeDot + "{2,})|^)"; //<not abbreavs><at least 2 chars or a begin line>
var exceptAbbrevs = "((^|\\W)(?!" + abbrevs + ")(" + permttedBeforeDot + "{2,})|^)"; //<not abbreavs><at least 2 chars or a begin line>


// Complex regexps, they match the character and some surrounding context...
// Those with a preamble:
var fullstopRegexp = "((" + exceptAbbrevs + "\\.(?!(\\s*" + aLowerCase + ")|[0-9]))|\\.{2,})"; // <two characters, or begin line><fullstop>
var semicolonRegexp = "\.;" // <char><semicolon>, needs char to match when preceded by an escape char.

// With a postamble:
var leftDquoteRegexp = "[\"](\\w|$)"; // <quote><word>
var rightDquoteRegexp = "[\"]\\.*(?=\\W|$)"; // <quote><space, punctuation or line end>


var puncRegexp=new RegExp(commaRegexp + "|"
			  + fullstopRegexp + "|"
			  + "[)(\]|"
			  + semicolonRegexp + "|" + colonRegexp + "|"
			  + leftDquoteRegexp  + "|" + rightDquoteRegexp + "|" 
			  + questionMarkRegexp + "|" + exclamMarkRegexp + "|" 
			  + ndashChar + "|"  + mdashChar + "|" 
			  + invertedQChar + "|"  + invertedExclamChar + "|" 
			  + leftAngleChar + "|" + rightAngleChar + "|"
			  + leftDblQuoteChar + "|" + rightDblQuoteChar + "|"
			  + ellipsisChar + "|" + "--"
			  , "g"); // any character matched by one of the above "OR" clauses is excluded from entering into another match.

// regexp always matches with the OR-case which matches the longest string, "(?" matches excluded.
// When two or more match to the same size,  the first OR-case is chosen.

var textnodes, match=false;
var quotesOpen = false;
var new_element;
var prevChar = "", prevOffset = 0;
var nodesProcessed = 0;
var callCount = 0;

//
// Overview.  Main code starts here.  Recurse through the nodes
// and replace each punctuation found with its larger counterpart image using <img> tags within a
// <span> tag created for any suitable text node with punctuation in it..  
// Text nodes are all named "text" and with id=3,  so the parent node is checked to ensure we are replacing
// punctuation only in those places where there are blocks of text, ie, in <P> or
// equivalents as configured, not in forms or textareas.
//

try{

function recurseTree(parentNode, recurseLevel) {   
    callCount ++;
    var child = parentNode.firstChild;
    while(child) {
	var nextSib = child.nextSibling;
	if (child.nodeType == 3) {
	    ProcessNode(child); // This changes the structure of child so don't call it again in this function.
	}
	else {
            recurseTree(child, recurseLevel + 1);   
	}
	child = nextSib;
    }
}

function ProcessNode(node) {
    var s, parent, new_node, new_s, tag, grandParentTag;
    s = node.data;
    nodesProcessed++;
    if (/[\S]/.test(s)) {
	bytesProcessed += s.length;
	parent = node.parentNode;
	tag = parent.nodeName;                      
	grandParentTag = parent.parentNode ? parent.parentNode.nodeName : "";
	tag += "@"+grandParentTag; // Need to check greatgrandparent too?
	if (  ! tag.match(exclusionTags)) { 
	    s =  s.replace(/</g, "&lt\\\;"); // escape any '<'s and escape the semicolon, 3 slashes!
	    //debugOut("___checking in parent tag: " + tag + ", the string: " + s);
	    match = "";
	    new_s = s.replace(puncRegexp, replacePunc);
	    prevChar = ""; prevOffset =0;
	    if (match) {
		new_element = document.createElement("span");
		new_element.innerHTML = new_s; // This processes the HTML we pass in.
		parent.replaceChild(new_element, node);
		new_element.setAttribute("class", "puncrep");
		new_element.setAttribute("puncparents", tag);
		new_element.setAttribute("puncmatch", match);
	    } // end if(match) */
	}
	else  { // tag not wanted.
	    ignored++;
	    if (debugOn) { 
		new_element = document.createElement("meta");
		new_element.setAttribute("puncIgnored", tag);
		parent.insertBefore(new_element, node);
	    }
	} // end else tag not wanted
    }
} // end function ProcessNode()
} catch(e) { 
    var lineno = 0;
    if (Firefox)
	lineno = e.lineNumber;
    window.status  = " Highlight punctuation, exception: " + e + ", line: " + (lineno -330) ; 
    //debugOut(" caught: "+ e + "line:"+(lineno-330)); // 330 is a guess, see lineFinder var in greasemonkey.js
    throw(e);
}

// Dont declare vars here, the following function is called before it and the vars are then undefined.

// Change of parenthesis in regexp means a change in parameters p2-p9 & offset:
function replacePunc(str) {
    // Function is called via the above replace() method call...
    var offset = arguments[arguments.length-2];
    //debugOut("_______replacePunc----FOR--" + str + "--offset--"+offset);
    var result = "", preamble = "",  postamble = "";
    var quoteChar = false, angleQuote = false, dots = 0;
    var noWrap = true;
    bytesParsed += str.length;
    for (var i = 0; i < str.length; i++) { 
	no_of_matches++;
	switch (str.charAt(i)) 
	{
	case ',': 
	    result += commaHTML;
	    break;
	case '.': 
	    dots++;
	    result += fullstopHTML;
	    break;
	case ';': 
	    if (preamble != "\\")
		result += semicolonHTML;
	    else
		result += ";";
	    break;
	case ':': 
	    result += colonHTML;
	    break;
	case '?': 
	    result += questionMarkHTML;
	    break;
	case '!': 
	    result += exclamMarkHTML;
	   break;
	case '\\':
	    preamble += "\\";
	    noWrap = false;
	    break; // escape char, throw away.
	case ndashChar: 
	    result += ndashHTML;
	    break;
	case mdashChar: 
	    result += mdashHTML;
	    break;
        case '(':
		result += leftBracketHTML;
		break;
	case ')':
		result += rightBracketHTML;
		break;
	case leftAngleChar:
	    result += leftAngleHTML ;
	    break;
	case rightAngleChar: 
	    result += rightAngleHTML;
	    break;
	case leftDblQuoteChar:
	    result += leftDoubleQuoteHTML;
	    break;
	case rightDblQuoteChar:
	    if (/[.?]/.test(prevChar) && (offset - prevOffset == 1)) 
		result = rightDoubleQuote2HTML + result;
	    else
		result += rightDoubleQuoteHTML;
	    break;
	case '\"':
	    quoteChar = true;
	    break;
	case ellipsisChar:
	    result += fullstopHTML + fullstopHTML + fullstopHTML;
	    break;
	case invertedExclamChar: 
	    result += invertedExclamHTML;
	    break;
	case invertedQChar: 
	    result += invertedQHTML;
	    break;
	default: 
	    no_of_matches--;
	    if (quoteChar) // quotes are the only ones with a postamble.
		postamble += str.charAt(i);
	    else {
		preamble += str.charAt(i);
		result += str.charAt(i);
	    }
	    break;
	}
    } // end for().
    // Special cases..
    if (dots == 2 && ! (prevChar == '.' && (offset - prevOffset == 1 )))  // Not a plain ellipsis "...", special ellipsis char is already above.
	result = "\." + fullstopHTML;
    if (/--/.test(result))
	result = result.replace(/--/g,ndashHTML);
    if (quoteChar) 
    {
	if (postamble.match(/[\w]/) || ( ! postamble && ! quotesOpen)) {
	    result = leftDoubleQuoteHTML + postamble + result;
	    quotesOpen = true;
	}
	else {
	    quotesOpen = false;
	    if (/[.?]/.test(prevChar) && (offset - prevOffset == 1)) { // test if the previous character was a dot or a ?
		result = rightDoubleQuote2HTML + postamble + result; // This places makes it closer to the dot or ?
	    }
	    else {
		result = rightDoubleQuoteHTML + postamble + result;
	    }
	}
    } // End if (quoteChar)
    match += str;
    prevChar = str.charAt(i-1);
    prevOffset = offset + i - 1;
    //debugOut("_______match return, preamble--" + preamble + "--postamble--" + postamble + "--prevChar--" + prevChar + "--prevOffset--"+prevOffset);
    if (noWrap)
	result = "<nobr>" + result + "</nobr>";
    //debugOut("_______result: "+result);
    return result;
}

recurseTree(document, 0);

//
// performance monitoring:
// 

if (reportPerformance) {
    var endTime = (new Date()).getTime();
    var intervalInMilisconds = Math.round(endTime - startTime); 
    var perReplacement = (intervalInMilisconds / (no_of_matches + 1)).toPrecision(3);
    var msg = "\n" + perReplacement + " msecs for each of "
	+ no_of_matches + " matches; total: " 
	+ intervalInMilisconds+" msecs";
    if (bytesProcessed > 2000) {
	bytesProcessed = Math.round(bytesProcessed / 1000) + " kbytes";
	bytesParsed = Math.round(bytesParsed / 1000) + "k";
    }
    msg += ".  \n"+ bytesProcessed + " (" + bytesParsed + " parsed) in "
	+nodesProcessed+" nodes.\n" + ignored + " ignored -- nodes had tags on ignore list"
    window.status = "Highlight punctuation :" + msg;
    // window.setTimeout(function () { status.window = msg;},  2000);
    GM_log("\nAt " + String(Date()).match(/\d+:\d+:\d+/) + ", performance stats were, for page: \n     "+window.location.href+"\n"+msg);
    //debugOut("Perf msg: "+msg);
    // Set about:config value, "dom.disable_window_status_change" to false for 
    // this to show performance results in the status bar...
}

})()

//
// End
//

