# http://www.cs.dal.ca/~vlado/srcperl/snip/encode_S # 2003 Vlado Keselj # # This is an attempt to define encoding of a string into a \S+ # sequence in a "natural" and compact way. The inverse function is # decode_S. # # "Natural" and compact are not precisely defined. "Natural" refers # to a kind of user-friendliness when reading directly an encoded # string. This is translated into the following list of requirements: # - digits and letters should stay the same # - space should be replaced by _ (underscore) # - each byte should not be replaced by more than one byte # - a prefix of the original string has to correspond to a prefix of # the encoded string # # encode_S Oct Dec Hex Char C encode_S Oct Dec Hex Char C # ------------------------------------------------------------ # \0 000 0 00 NUL '\0' @ 100 64 40 @ # \1 001 1 01 SOH A 101 65 41 A # \2 002 2 02 STX B 102 66 42 B # \3 003 3 03 ETX C 103 67 43 C # \4 004 4 04 EOT D 104 68 44 D # \5 005 5 05 ENQ E 105 69 45 E # \A 006 6 06 ACK F 106 70 46 F # \a 007 7 07 BEL '\a' G 107 71 47 G # \b 010 8 08 BS '\b' H 110 72 48 H # \t 011 9 09 HT '\t' I 111 73 49 I # \n 012 10 0A LF '\n' J 112 74 4A J # \v 013 11 0B VT '\v' K 113 75 4B K # \f 014 12 0C FF '\f' L 114 76 4C L # \r 015 13 0D CR '\r' M 115 77 4D M # \o 016 14 0E SO N 116 78 4E N # \i 017 15 0F SI O 117 79 4F O # \l 020 16 10 DLE P 120 80 50 P # \6 021 17 11 DC1 Q 121 81 51 Q # \7 022 18 12 DC2 R 122 82 52 R # \8 023 19 13 DC3 S 123 83 53 S # \9 024 20 14 DC4 T 124 84 54 T # \N 025 21 15 NAK U 125 85 55 U # \S 026 22 16 SYN V 126 86 56 V # \T 027 23 17 ETB W 127 87 57 W # \c 030 24 18 CAN X 130 88 58 X # \E 031 25 19 EM Y 131 89 59 Y # \s 032 26 1A SUB Z 132 90 5A Z # \e 033 27 1B ESC [ 133 91 5B [ # \F 034 28 1C FS \\ 134 92 5C \ '\\' # \G 035 29 1D GS ] 135 93 5D ] # \R 036 30 1E RS \^ 136 94 5E ^ # \U 037 31 1F US \_ 137 95 5F _ # _ 040 32 20 SPACE \` 140 96 60 ` # ! 041 33 21 ! a 141 97 61 a # " 042 34 22 " b 142 98 62 b # # 043 35 23 # c 143 99 63 c # $ 044 36 24 $ d 144 100 64 d # % 045 37 25 % e 145 101 65 e # & 046 38 26 & f 146 102 66 f # ' 047 39 27 ' g 147 103 67 g # ( 050 40 28 ( h 150 104 68 h # ) 051 41 29 ) i 151 105 69 i # * 052 42 2A * j 152 106 6A j # + 053 43 2B + k 153 107 6B k # , 054 44 2C , l 154 108 6C l # - 055 45 2D - m 155 109 6D m # . 056 46 2E . n 156 110 6E n # / 057 47 2F / o 157 111 6F o # 0 060 48 30 0 p 160 112 70 p # 1 061 49 31 1 q 161 113 71 q # 2 062 50 32 2 r 162 114 72 r # 3 063 51 33 3 s 163 115 73 s # 4 064 52 34 4 t 164 116 74 t # 5 065 53 35 5 u 165 117 75 u # 6 066 54 36 6 v 166 118 76 v # 7 067 55 37 7 w 167 119 77 w # 8 070 56 38 8 x 170 120 78 x # 9 071 57 39 9 y 171 121 79 y # : 072 58 3A : z 172 122 7A z # ; 073 59 3B ; { 173 123 7B { # < 074 60 3C < | 174 124 7C | # = 075 61 3D = } 175 125 7D } # > 076 62 3E > ~ 176 126 7E ~ # ? 077 63 3F ? \d 177 127 7F DEL # # For bytes higher than 127: # 1. map into lower than 128 by removing the leading bit, let c be the # character. # 2. if c corresponds to itself replace with ^c # 3. otherwise, if c corresponds to \x replace with `x sub encode_S { local $_ = shift; s/=/=0/g; # first hide a special character (=) s/\\/=b/g; # encode backslashes s/([\x80-\xFF])/=x$1/g; # replace >127 with 127 tr/\x80-\xFF/\x00-\x7F/; s/=x=/=X/g; # hide again = s/([\x00-\x1F\x5C\x5E-\x60\x7F])/=B$1/g; tr/\x20\x00-\x1F\x7F/_0-5Aabtnvfroil6-9NSTcEseFGRUd/; s/=x=B(\S)/`$1/g; # hex backslash s/=x(\S)/^$1/g; # hex other s/=B(\S)/\\$1/g; # backslashed s/=b/\\\\/g; # original backslashes s/=X/^=0/g; s/=0/=/g; # put back = return $_; } 1;