00001
00002
00003
00004
00005 #include "wvbackslash.h"
00006 #include "wvbuf.h"
00007 #include "wvstream.h"
00008 #include "wvstring.h"
00009 #include "wvstringmask.h"
00010 #include "wvtclstring.h"
00011
00012 const WvStringMask WVTCL_NASTY_SPACES(WVTCL_NASTY_SPACES_STR);
00013 const WvStringMask WVTCL_NASTY_NEWLINES(WVTCL_NASTY_NEWLINES_STR);
00014 const WvStringMask WVTCL_SPLITCHARS(WVTCL_SPLITCHARS_STR);
00015
00016 static size_t wvtcl_escape(char *dst, const char *s, size_t s_len,
00017 const WvStringMask &nasties, bool *verbatim = NULL)
00018 {
00019 if (verbatim) *verbatim = false;
00020
00021
00022 if (s == NULL)
00023 return 0;
00024
00025 if (s_len == 0)
00026 {
00027 if (dst)
00028 {
00029 dst[0] = '{';
00030 dst[1] = '}';
00031 }
00032 return 2;
00033 }
00034
00035 bool backslashify = false, inescape = false;
00036 int len = 0, unprintables = 0, bracecount = 0;
00037 const char *cptr, *cptr_end = s + s_len;
00038
00039
00040
00041
00042 for (cptr = s; cptr != cptr_end; cptr++)
00043 {
00044
00045 if (dst) dst[len] = *cptr;
00046 ++len;
00047
00048 if (!inescape && *cptr == '{')
00049 bracecount++;
00050 else if (!inescape && *cptr == '}')
00051 bracecount--;
00052 if (bracecount < 0)
00053 backslashify = true;
00054
00055 bool doit = false;
00056 switch (*cptr)
00057 {
00058 case WVTCL_ALWAYS_NASTY_CASE:
00059 doit = true;
00060 break;
00061 default:
00062 if (nasties[*cptr])
00063 doit = true;
00064 }
00065 if (doit)
00066 unprintables++;
00067
00068 if (*cptr == '\\')
00069 inescape = !inescape;
00070 else
00071 inescape = false;
00072 }
00073
00074
00075 if (bracecount != 0 || inescape)
00076 backslashify = true;
00077
00078 if (!backslashify && !unprintables)
00079 {
00080 if (verbatim) *verbatim = true;
00081 return len;
00082 }
00083
00084 if (backslashify)
00085 {
00086 if (dst)
00087 {
00088 len = 0;
00089 for (cptr = s; cptr != cptr_end; ++cptr)
00090 {
00091 bool doit = false;
00092 switch (*cptr)
00093 {
00094 case WVTCL_ALWAYS_NASTY_CASE:
00095 doit = true;
00096 break;
00097 default:
00098 if (nasties[*cptr])
00099 doit = true;
00100 }
00101 if (doit)
00102 dst[len++] = '\\';
00103
00104 dst[len++] = *cptr;
00105 }
00106 return len;
00107 }
00108 else return len+unprintables;
00109 }
00110 else
00111 {
00112
00113 if (dst)
00114 {
00115 len = 0;
00116 dst[len++] = '{';
00117 for (cptr = s; cptr != cptr_end; ++cptr)
00118 dst[len++] = *cptr;
00119 dst[len++] = '}';
00120 return len;
00121 }
00122 else return len+2;
00123 }
00124 }
00125
00126
00127 WvString wvtcl_escape(WvStringParm s, const WvStringMask &nasties)
00128 {
00129 size_t s_len = s.len();
00130
00131 bool verbatim;
00132 size_t len = wvtcl_escape(NULL, s, s_len, nasties, &verbatim);
00133 if (verbatim) return s;
00134
00135 WvString result;
00136 result.setsize(len);
00137 char *e = result.edit();
00138 e += wvtcl_escape(e, s, s_len, nasties);
00139 *e = '\0';
00140 return result;
00141 }
00142
00143
00144 static size_t wvtcl_unescape(char *dst, const char *s, size_t s_len,
00145 bool *verbatim = NULL)
00146 {
00147
00148
00149
00150 if (!s)
00151 {
00152 if (verbatim) *verbatim = true;
00153 return 0;
00154 }
00155
00156 if (verbatim) *verbatim = false;
00157
00158
00159 if (s[0] == '{' && s[s_len-1] == '}')
00160 {
00161 if (dst) memcpy(dst, &s[1], s_len-2);
00162 return s_len - 2;
00163 }
00164
00165 bool skipquotes = false;
00166
00167 if (s[0] == '"' && s[s_len-1] == '"')
00168 skipquotes = true;
00169
00170
00171 const char *start = s, *end = &s[s_len];
00172 if (skipquotes)
00173 {
00174 ++start;
00175 --end;
00176 }
00177 size_t len = 0;
00178 bool inescape = false;
00179 for (; start != end; ++start)
00180 {
00181 if (*start == '\\')
00182 {
00183 if (inescape)
00184 {
00185 if (dst) dst[len] = *start;
00186 len++;
00187 inescape = false;
00188 }
00189 else
00190 inescape = true;
00191 }
00192 else
00193 {
00194 inescape = false;
00195 if (dst) dst[len] = *start;
00196 len++;
00197 }
00198 }
00199 return len;
00200 }
00201
00202
00203 WvString wvtcl_unescape(WvStringParm s)
00204 {
00205 size_t s_len = s.len();
00206
00207 bool verbatim;
00208 size_t len = wvtcl_unescape(NULL, s, s_len, &verbatim);
00209 if (verbatim) return s;
00210
00211 WvString result;
00212 result.setsize(len+1);
00213 char *e = result.edit();
00214 e += wvtcl_unescape(e, s, s_len);
00215 *e = '\0';
00216 return result;
00217 }
00218
00219
00220 WvString wvtcl_encode(WvList<WvString> &l, const WvStringMask &nasties,
00221 const WvStringMask &splitchars)
00222 {
00223 int size = 0;
00224
00225 WvList<WvString>::Iter i(l);
00226 int count = 0;
00227 for (i.rewind(); i.next(); )
00228 {
00229 size += wvtcl_escape(NULL, *i, i->len(), nasties);
00230 ++count;
00231 }
00232
00233 WvString result;
00234 result.setsize(size+(count-1)+1);
00235
00236 char *p = result.edit();
00237 int j;
00238 for (i.rewind(), j=0; i.next(); ++j)
00239 {
00240 p += wvtcl_escape(p, *i, i->len(), nasties);
00241 if (j < count - 1)
00242 *p++ = splitchars.first();
00243 }
00244 *p = '\0';
00245
00246 return result;
00247 }
00248
00249 const size_t WVTCL_GETWORD_NONE (UINT_MAX);
00250
00251 static size_t wvtcl_getword(char *dst, const char *s, size_t s_len,
00252 const WvStringMask &splitchars,
00253 bool do_unescape, size_t *end = NULL)
00254 {
00255
00256 if (!s_len) return WVTCL_GETWORD_NONE;
00257
00258 bool inescape = false, inquote = false, incontinuation = false;
00259 int bracecount = 0;
00260 const char *origend = s + s_len;
00261 const char *sptr, *eptr;
00262
00263
00264 for (sptr = s; sptr != origend; sptr++)
00265 {
00266 if (!splitchars[*sptr])
00267 break;
00268 }
00269
00270 if (sptr == origend)
00271 return WVTCL_GETWORD_NONE;
00272
00273
00274 if (*sptr == '"')
00275 {
00276 inquote = true;
00277 eptr = sptr+1;
00278 }
00279 else
00280 eptr = sptr;
00281
00282
00283 for (; eptr != origend; eptr++)
00284 {
00285 char ch = *eptr;
00286
00287 incontinuation = false;
00288
00289 if (inescape)
00290 {
00291 if (ch == '\n')
00292 {
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304 incontinuation = true;
00305 }
00306 inescape = false;
00307 }
00308 else if (ch == '\\')
00309 {
00310 inescape = true;
00311
00312 }
00313 else
00314 {
00315
00316 if (bracecount == 0)
00317 {
00318 if (inquote)
00319 {
00320 if (ch == '"')
00321 {
00322 eptr++;
00323 break;
00324 }
00325 }
00326 else if (splitchars[ch])
00327 break;
00328 }
00329
00330
00331 if (!inquote)
00332 {
00333 if (ch == '{')
00334 bracecount++;
00335 else if (bracecount > 0 && ch == '}')
00336 bracecount--;
00337 }
00338 }
00339 }
00340
00341 if (bracecount || sptr==eptr || inquote || inescape || incontinuation)
00342
00343 return WVTCL_GETWORD_NONE;
00344
00345
00346 if (end) *end = eptr - s;
00347
00348 if (do_unescape)
00349 return wvtcl_unescape(dst, sptr, eptr-sptr);
00350 else
00351 {
00352 if (dst) memcpy(dst, sptr, eptr-sptr);
00353 return eptr - sptr;
00354 }
00355 }
00356
00357
00358 WvString wvtcl_getword(WvBuf &buf, const WvStringMask &splitchars,
00359 bool do_unescape)
00360 {
00361 int origsize = buf.used();
00362 const char *origptr = (const char *)buf.get(origsize);
00363
00364 size_t end;
00365 size_t len = wvtcl_getword(NULL, origptr, origsize,
00366 splitchars, do_unescape, &end);
00367 if (len == WVTCL_GETWORD_NONE)
00368 {
00369 buf.unget(origsize);
00370 return WvString::null;
00371 }
00372
00373 WvString result;
00374 result.setsize(len+1);
00375 char *e = result.edit();
00376 e += wvtcl_getword(e, origptr, origsize, splitchars, do_unescape);
00377 *e = '\0';
00378
00379 buf.unget(origsize - end);
00380
00381 return result;
00382 }
00383
00384
00385 void wvtcl_decode(WvList<WvString> &l, WvStringParm _s,
00386 const WvStringMask &splitchars, bool do_unescape)
00387 {
00388 const char *s = _s;
00389 size_t s_len = _s.len();
00390 for (;;)
00391 {
00392 size_t end;
00393 size_t len = wvtcl_getword(NULL, s, s_len,
00394 splitchars, do_unescape, &end);
00395 if (len == WVTCL_GETWORD_NONE)
00396 break;
00397
00398 WvString *word = new WvString();
00399 word->setsize(len+1);
00400
00401 char *e = word->edit();
00402 e += wvtcl_getword(e, s, s_len, splitchars, do_unescape);
00403 *e = '\0';
00404 l.append(word, true);
00405
00406 s += end;
00407 s_len -= end;
00408 }
00409 }