Move urlencode/urldecode functions to core lib

This commit is contained in:
Andrew Janke 2015-08-09 16:28:47 -04:00
parent 88f42b6132
commit 14b4ba83c3
2 changed files with 135 additions and 32 deletions

View file

@ -73,3 +73,137 @@ function env_default() {
env | grep -q "^$1=" && return 0 env | grep -q "^$1=" && return 0
export "$1=$2" && return 3 export "$1=$2" && return 3
} }
# Required for $langinfo
zmodload zsh/langinfo
# URL-encode a string
#
# Encodes a string using RFC 2396 URL-encoding (%-escaped).
# See: https://www.ietf.org/rfc/rfc2396.txt
#
# By default, reserved characters and unreserved "mark" characters are
# not escaped by this function. This allows the common usage of passing
# an entire URL in, and encoding just special characters in it, with
# the expectation that reserved and mark characters are used appropriately.
# The -r and -m options turn on escaping of the reserved and mark characters,
# respectively, which allows arbitrary strings to be fully escaped for
# embedding inside URLs, where reserved characters might be misinterpreted.
#
# Prints the encoded string on stdout.
# Returns nonzero if encoding failed.
#
# Usage:
# omz_urlencode [-r] [-m] <string>
#
# -r causes reserved characters (;/?:@&=+$,) to be escaped
#
# -m causes "mark" characters (_.!~*''()-) to be escaped
#
# -P causes spaces to be encoded as '%20' instead of '+'
function omz_urlencode() {
emulate -L zsh
zparseopts -D -E -a opts r m P
local in_str=$1
local url_str=""
local spaces_as_plus
if [[ -z $opts[(r)-P] ]]; then spaces_as_plus=1; fi
local str="$in_str"
# URLs must use UTF-8 encoding; convert str to UTF-8 if required
local encoding=$langinfo[CODESET]
local safe_encodings
safe_encodings=(UTF-8 utf8 US-ASCII)
if [[ -z ${safe_encodings[(r)$encoding]} ]]; then
str=$(echo -E "$str" | iconv -f $encoding -t UTF-8)
if [[ $? != 0 ]]; then
echo "Error converting string from $encoding to UTF-8" >&2
return 1
fi
fi
# Use LC_CTYPE=C to process text byte-by-byte
local i byte ord LC_ALL=C
export LC_ALL
local reserved=';/?:@&=+$,'
local mark='_.!~*''()-'
local dont_escape="[A-Za-z0-9"
if [[ -z $opts[(r)-r] ]]; then
dont_escape+=$reserved
fi
# $mark must be last because of the "-"
if [[ -z $opts[(r)-m] ]]; then
dont_escape+=$mark
fi
dont_escape+="]"
# Implemented to use a single printf call and avoid subshells in the loop,
# for performance (primarily on Windows).
local url_str=""
for (( i = 1; i <= ${#str}; ++i )); do
byte="$str[i]"
if [[ "$byte" =~ "$dont_escape" ]]; then
url_str+="$byte"
else
if [[ "$byte" == " " && -n $spaces_as_plus ]]; then
url_str+="+"
else
ord=$(( [##16] #byte ))
url_str+="%$ord"
fi
fi
done
echo -E "$url_str"
}
# URL-decode a string
#
# Decodes a RFC 2396 URL-encoded (%-escaped) string.
# This decodes the '+' and '%' escapes in the input string, and leaves
# other characters unchanged. Does not enforce that the input is a
# valid URL-encoded string. This is a convenience to allow callers to
# pass in a full URL or similar strings and decode them for human
# presentation.
#
# Outputs the encoded string on stdout.
# Returns nonzero if encoding failed.
#
# Usage:
# omz_urldecode <urlstring> - prints decoded string followed by a newline
function omz_urldecode {
emulate -L zsh
local encoded_url=$1
echo -e input $1
# Work bytewise, since URLs escape UTF-8 octets
local caller_encoding=$langinfo[CODESET]
local LC_ALL=C
export LC_ALL
# Change + back to ' '
local tmp=${encoded_url:gs/+/ /}
# Protect other escapes to pass through the printf unchanged
tmp=${tmp:gs/\\/\\\\/}
# Handle %-escapes by turning them into `\xXX` printf escapes
tmp=${tmp:gs/%/\\x/}
echo -E "before decode $tmp"
local decoded
eval "decoded=\$'$tmp'"
# Now we have a UTF-8 encoded string in the variable. We need to re-encode
# it if caller is in a non-UTF-8 locale.
local safe_encodings
safe_encodings=(UTF-8 utf8 US-ASCII)
if [[ -z ${safe_encodings[(r)$caller_encoding]} ]]; then
decoded=$(echo -E "$decoded" | iconv -f UTF-8 -t $caller_encoding)
if [[ $? != 0 ]]; then
echo "Error converting string from UTF-8 to $caller_encoding" >&2
return 1
fi
fi
echo -E "$decoded"
}

View file

@ -59,44 +59,13 @@ preexec_functions+=(omz_termsupport_preexec)
if [[ "$TERM_PROGRAM" == "Apple_Terminal" ]] && [[ -z "$INSIDE_EMACS" ]]; then if [[ "$TERM_PROGRAM" == "Apple_Terminal" ]] && [[ -z "$INSIDE_EMACS" ]]; then
# URL-encodes a string
# Outputs the encoded string on stdout
# Returns nonzero if encoding failed
function _omz_urlencode() {
local str=$1
local url_str=""
# URLs must use UTF-8 encoding; convert if required
local encoding=${LC_CTYPE/*./}
if [[ -n $encoding && $encoding != UTF-8 && $encoding != utf8 ]]; then
str=$(echo $str | iconv -f $encoding -t UTF-8)
if [[ $? != 0 ]]; then
echo "Error converting string from $encoding to UTF-8" >&2
return 1
fi
fi
# Use LC_CTYPE=C to process text byte-by-byte
local i ch hexch LC_CTYPE=C
for ((i = 1; i <= ${#str}; ++i)); do
ch="$str[i]"
if [[ "$ch" =~ [/._~A-Za-z0-9-] ]]; then
url_str+="$ch"
else
hexch=$(printf "%02X" "'$ch")
url_str+="%$hexch"
fi
done
echo $url_str
}
# Emits the control sequence to notify Terminal.app of the cwd # Emits the control sequence to notify Terminal.app of the cwd
function update_terminalapp_cwd() { function update_terminalapp_cwd() {
# Identify the directory using a "file:" scheme URL, including # Identify the directory using a "file:" scheme URL, including
# the host name to disambiguate local vs. remote paths. # the host name to disambiguate local vs. remote paths.
# Percent-encode the pathname. # Percent-encode the pathname.
local URL_PATH=$(_omz_urlencode $PWD) local URL_PATH=$(omz_urlencode -P $PWD)
[[ $? != 0 ]] && return 1 [[ $? != 0 ]] && return 1
local PWD_URL="file://$HOST$URL_PATH" local PWD_URL="file://$HOST$URL_PATH"
# Undocumented Terminal.app-specific control sequence # Undocumented Terminal.app-specific control sequence