changelog shortlog tags branches changeset files revisions annotate raw help

Mercurial > hg > werc / bin/contrib/urlencode.awk

changeset 672: cd59fedffc2e
parent: 3c404e092908
author: sl
date: Fri, 02 Dec 2016 20:35:58 -0500
permissions: -rwxr-xr-x
description: etc/initrc: change default markdown formatter from markdown.pl (no modern perl on plan 9) to md2html.awk (works everywhere)
1 # Taken from http://www.shelldorado.com/scripts/cmds/urlencode
2 ##########################################################################
3 # Title : urlencode - encode URL data
4 # Author : Heiner Steven (heiner.steven@odn.de)
5 # Date : 2000-03-15
6 # Requires : awk
7 # Categories : File Conversion, WWW, CGI
8 # SCCS-Id. : @(#) urlencode 1.4 06/10/29
9 ##########################################################################
10 # Description
11 # Encode data according to
12 # RFC 1738: "Uniform Resource Locators (URL)" and
13 # RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
14 #
15 # This encoding is used i.e. for the MIME type
16 # "application/x-www-form-urlencoded"
17 #
18 # Notes
19 # o The default behaviour is not to encode the line endings. This
20 # may not be what was intended, because the result will be
21 # multiple lines of output (which cannot be used in an URL or a
22 # HTTP "POST" request). If the desired output should be one
23 # line, use the "-l" option.
24 #
25 # o The "-l" option assumes, that the end-of-line is denoted by
26 # the character LF (ASCII 10). This is not true for Windows or
27 # Mac systems, where the end of a line is denoted by the two
28 # characters CR LF (ASCII 13 10).
29 # We use this for symmetry; data processed in the following way:
30 # cat | urlencode -l | urldecode -l
31 # should (and will) result in the original data
32 #
33 # o Large lines (or binary files) will break many AWK
34 # implementations. If you get the message
35 # awk: record `...' too long
36 # record number xxx
37 # consider using GNU AWK (gawk).
38 #
39 # o urlencode will always terminate it's output with an EOL
40 # character
41 #
42 # Thanks to Stefan Brozinski for pointing out a bug related to non-standard
43 # locales.
44 #
45 # See also
46 # urldecode
47 ##########################################################################
48 
49 PN=`basename "$0"` # Program name
50 VER='1.4'
51 
52 : ${AWK=awk}
53 
54 Usage () {
55  echo >&2 "$PN - encode URL data, $VER
56 usage: $PN [-l] [file ...]
57  -l: encode line endings (result will be one line of output)
58 
59 The default is to encode each input line on its own."
60  exit 1
61 }
62 
63 Msg () {
64  for MsgLine
65  do echo "$PN: $MsgLine" >&2
66  done
67 }
68 
69 Fatal () { Msg "$@"; exit 1; }
70 
71 set -- `getopt hl "$@" 2>/dev/null` || Usage
72 [ $# -lt 1 ] && Usage # "getopt" detected an error
73 
74 EncodeEOL=no
75 while [ $# -gt 0 ]
76 do
77  case "$1" in
78  -l) EncodeEOL=yes;;
79  --) shift; break;;
80  -h) Usage;;
81  -*) Usage;;
82  *) break;; # First file name
83  esac
84  shift
85 done
86 
87 LANG=C export LANG
88 $AWK '
89  BEGIN {
90  # We assume an awk implementation that is just plain dumb.
91  # We will convert an character to its ASCII value with the
92  # table ord[], and produce two-digit hexadecimal output
93  # without the printf("%02X") feature.
94 
95  EOL = "%0A" # "end of line" string (encoded)
96  split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
97  hextab [0] = 0
98  for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
99  if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0
100  }
101  {
102  encoded = ""
103  for ( i=1; i<=length ($0); ++i ) {
104  c = substr ($0, i, 1)
105  if ( c ~ /[a-zA-Z0-9.-]/ ) {
106  encoded = encoded c # safe character
107  } else if ( c == " " ) {
108  encoded = encoded "+" # special handling
109  } else {
110  # unsafe character, encode it as a two-digit hex-number
111  lo = ord [c] % 16
112  hi = int (ord [c] / 16);
113  encoded = encoded "%" hextab [hi] hextab [lo]
114  }
115  }
116  if ( EncodeEOL ) {
117  printf ("%s", encoded EOL)
118  } else {
119  print encoded
120  }
121  }
122  END {
123  #if ( EncodeEOL ) print ""
124  }
125 ' "$@"
126