Title:
[ANeff] ER for: canonicalizeURL(inputString, restrictMultiple, restrictMixed[, throwOnError=false])
| View in TrackerStatus/Resolution/Reason: To Fix//
Reporter/Name(from Bugbase): Aaron Neff / Aaron Neff (Aaron Neff)
Created: 01/21/2015
Components: Language
Versions: 11.0
Failure Type: Enhancement Request
Found In Build/Fixed In Build: CF11_Final /
Priority/Frequency: Trivial / Unknown
Locale/System: English / Win All
Vote Count: 3
canonicalize(myURL) is broken b/c it incorrectly interprets some query string parameters as character entities and converts them to symbols.
Example:
writeOutput(canonicalize("http://www.domain.com/?foo=bar&pid=product_id", true, true, false)
returns: http://www.domain.com/?foo=bar?d=product_id (note the ampersand is gone and there's a Pi symbol between 'r' and 'd')
Thus, URLs are a special case and a URL-specific canonicalizeURL() function is needed that takes same parameters as canonicalize(). Example:
<cfscript>
// Canonicalizes a URL b/c canonicalize() converts, for example, &pi to the Pi symbol in the query string ?foo=bar&pid=product_id
function udfCanonicalizeURL(required string inputString, required boolean restrictMultiple, required boolean restrictMixed, boolean throwOnError=false) {
var canonicalizedURL="";
ARGUMENTS.inputString = trim(ARGUMENTS.inputString);
if(isValid("url", ARGUMENTS.inputString)) {//note: has a bug per #3924581
var pattern = "([^?##]*)?(\?([^##]*))?(##(.*))?";//parses the URL into schemeHostPath, querystring and fragment
var parsedURL = reFind(pattern, ARGUMENTS.inputString, 1, true);
if(parsedURL.len[2]) {//2=schemeHostPath
canonicalizedURL &= canonicalize(mid(ARGUMENTS.inputString, parsedURL.pos[2], parsedURL.len[2]), ARGUMENTS.restrictMultiple, ARGUMENTS.restrictMixed, ARGUMENTS.throwOnError);
if(parsedURL.len[4]) {//4=querystring
var qs = mid(ARGUMENTS.inputString, parsedURL.pos[4], parsedURL.len[4]);
var canonicalizedQS="";
var qsPairs = reMatch("[\&;]?[^\&;]+", qs);
for(var qsPair in qsPairs) {
var qsPairNoDelim = listLast(qsPair, "&;");
canonicalizedQS &= ((reFind("^[\&;].*", qsPair)?left(qsPair, 1):'') & canonicalize(listFirst(qsPairNoDelim, "="), ARGUMENTS.restrictMultiple, ARGUMENTS.restrictMixed, ARGUMENTS.throwOnError));
var qsValueStartPos = find("=", qsPairNoDelim);
if(qsValueStartPos and (len(qsPairNoDelim) gt qsValueStartPos)) {
canonicalizedQS &= ('=' & canonicalize(right(qsPairNoDelim, len(qsPairNoDelim) - qsValueStartPos), ARGUMENTS.restrictMultiple, ARGUMENTS.restrictMixed, ARGUMENTS.throwOnError));
}
}
if(len(canonicalizedQS)) {
canonicalizedURL &= ('?' & canonicalizedQS);
}
}
if(parsedURL.len[6]) {//6=fragment
canonicalizedURL &= ("##" & canonicalize(mid(ARGUMENTS.inputString, parsedURL.pos[6], parsedURL.len[6]), ARGUMENTS.restrictMultiple, ARGUMENTS.restrictMixed, ARGUMENTS.throwOnError));
}
}
} else if(throwOnError) {
throw(message = "URL is not valid");
}
return canonicalizedURL;
}
theURL = "http://www.domain.com/?foo=bar&pid=product_id";
writeOutput(canonicalize(theURL, true, true, false) & '<br> ' & udfCanonicalizeURL(theURL, true, true, false));
</cfscript>
----------------------------- Additional Watson Details -----------------------------
Watson Bug ID: 3924625
Reason: BugVerified
External Customer Info:
External Company:
External Customer Name: Aaron
External Customer Email:
Attachments:
- January 21, 2015 00:00:00: 1_3924625.cfm
Comments: