1 //Written in the D programming language 2 /* 3 * Serializes data to and from strings. 4 * 5 * It does not support the ability to read/write objects, but does 6 * support arrays and associative arrays. 7 * 8 * To serialize a value, call "serialize!(Type)(value)". 9 * 10 * Unserializing is a bit more tricky. You need to know the type before hand 11 * and unserialise using it. An alternative would be to use Variant, but I 12 * have not got that working. 13 * 14 * To unserialize a value call "unserialize!(Type)(const(char)[] str)". 15 * str will be updated to the remainder of the string and the decoded 16 * value is returned. 17 * 18 * Future direction: Make it all work with ranges, maybe. 19 * 20 * Copyright 2013 Jaypha 21 * 22 * Distributed under the Boost Software License, Version 1.0. 23 * (See http://www.boost.org/LICENSE_1_0.txt) 24 * 25 * Authors: Jason den Dulk 26 */ 27 28 module jaypha.io.serialize; 29 30 import std.regex; 31 import std.format; 32 import std.array; 33 import std.traits; 34 import std.string; 35 import std.conv; 36 import std.utf; 37 import std.variant; 38 39 //---------------------------------------------------------------------------- 40 // 41 // serialize 42 // 43 //---------------------------------------------------------------------------- 44 // 45 // Various serialize functions of the general format 46 // string serialize(T)(T value) where T is a supported type. 47 // Returns the encoded value. 48 // 49 //---------------------------------------------------------------------------- 50 51 string serialize(T)(T value) if (isSomeChar!T) 52 { 53 char[4] v; 54 auto v2 = toUTF8(v,value); 55 return format("s%d:%s",v2.length,v2); 56 } 57 58 //---------------------------------------------------------------------------- 59 60 string serialize(T:bool)(T value) 61 { 62 return value?"b1":"b0"; 63 } 64 65 //---------------------------------------------------------------------------- 66 67 string serialize(T)(T value) if (isIntegral!(T)) 68 { 69 return format("i%d",value); 70 } 71 72 //---------------------------------------------------------------------------- 73 74 string serialize(T)(T value) if (isFloatingPoint!(T)) 75 { 76 return format("f%f",value); 77 } 78 79 //---------------------------------------------------------------------------- 80 81 string serialize(T:T[])(T[] value) 82 { 83 static if (is(Unqual!T == char)) 84 return format("s%d:%s",value.length,value); 85 else static if (is(Unqual!T == wchar) || is(Unqual!T == dchar)) 86 return serialize!(const(char)[])(toUTF8(value)); 87 else 88 { 89 auto x = appender!(string); 90 formattedWrite(x,"a%d",value.length); 91 foreach (idx,val; value) 92 x.put(serialize!(T)(val)); 93 return x.data; 94 } 95 } 96 97 //---------------------------------------------------------------------------- 98 99 string serialize(T:T[U],U)(T[U] value) 100 { 101 auto x = appender!string(); 102 formattedWrite(x,"o%d",value.length); 103 foreach (idx,val; value) 104 { 105 x.put(serialize!(U)(idx)); 106 x.put(serialize!(T)(val)); 107 } 108 return x.data; 109 } 110 111 //---------------------------------------------------------------------------- 112 113 string customSerialize(alias F,T:T[])(T[] value) 114 { 115 auto x = appender!(string); 116 formattedWrite(x,"a%d",value.length); 117 foreach (idx,val; value) 118 x.put(F(val)); 119 return x.data; 120 } 121 122 //---------------------------------------------------------------------------- 123 124 string customSerialize(alias F,T:T[U],U)(T[U] value) 125 { 126 auto x = appender!(string); 127 formattedWrite(x,"o%d",value.length); 128 foreach (idx,val; value) 129 { 130 x.put(serialize!(U)(idx)); 131 x.put(F(val)); 132 } 133 return x.data; 134 } 135 136 //---------------------------------------------------------------------------- 137 // checkLengthTypeStart 138 //---------------------------------------------------------------------------- 139 // Used to parse the start of s, a and o types. Returns the extracted length. 140 //---------------------------------------------------------------------------- 141 142 uint checkLengthTypeStart(ref string str, dchar type) 143 { 144 if (str.length == 0 || str[0] != type) 145 throw new Exception(format("Malformed serialize string. Expecting '%c', got %s",type,str)); 146 147 auto m = match(str[1..$],php_z_uint); 148 if (!m) 149 throw new Exception(format("Malformed serialize string. Expecting integer, got %s",str)); 150 151 str = str[m.hit.length+1..$]; 152 return to!uint(m.hit); 153 } 154 155 //---------------------------------------------------------------------------- 156 // 157 // unserialize 158 // 159 //---------------------------------------------------------------------------- 160 // 161 // Various unserialize functions of the general format 162 // T unserialize(T)(ref const(char)[] str) where T is a supported type. 163 // Returns the decoded value, and str is updated to the remainder of the 164 // string. 165 // 166 //---------------------------------------------------------------------------- 167 168 enum php_z_int = ctRegex!(r"^-?\d+"); 169 enum php_z_uint = ctRegex!(r"^\d+"); 170 enum php_z_float = ctRegex!(r"^-?\d+(\.\d+)?((E|e)(\+|-)?\d+)?"); 171 172 T unserialize(T:bool)(ref string str) 173 { 174 scope(success) { str = str[2..$]; } 175 if (str.length <2) throw new Exception(format("Malformed serialize string. Expecting \"b0\"/\"b1\", got \"%s\"",str)); 176 if (str[0..2] == "b1") return true; 177 else if (str[0..2] == "b0") return false; 178 else throw new Exception(format("Malformed serialize string. Expecting \"b0\"/\"b1\", got \"%s\"",str[0..2])); 179 } 180 181 //---------------------------------------------------------------------------- 182 183 T unserialize(T)(ref string str) if (isIntegral!T) 184 { 185 if (str[0] != 'i') 186 throw new Exception(format("Malformed serialize string. Expecting 'i', got %s",str)); 187 188 auto m = match(str[1..$],php_z_int); 189 if (!m) 190 throw new Exception(format("Malformed serialize string. Expecting integer, got %s",str)); 191 192 const(char)[] val = m.hit; 193 194 str = str[val.length+1..$]; 195 return to!T(val); 196 } 197 198 //---------------------------------------------------------------------------- 199 200 T unserialize(T)(ref string str) if (isFloatingPoint!T) 201 { 202 if (str[0] != 'f') 203 throw new Exception(format("Malformed serialize string. Expecting 'f', got %s",str)); 204 205 auto m = match(str[1..$],php_z_float); 206 if (!m) 207 throw new Exception(format("Malformed serialize string. Expecting float, got %s",str)); 208 209 const(char)[] val = m.hit; 210 211 str = str[val.length+1..$]; 212 return to!T(val); 213 } 214 215 //---------------------------------------------------------------------------- 216 217 T[] unserialize(T:T[])(ref string str) 218 { 219 static if (isSomeChar!T) 220 { 221 // string values. 222 223 auto len = checkLengthTypeStart(str, 's'); 224 225 if (str[0] != ':') 226 throw new Exception(format("Malformed serialize string. Expecting ':', got %s",str)); 227 228 if (str.length < len) 229 throw new Exception(format("Malformed serialize string. Expecting string of length %d, got %s",len,str)); 230 231 auto s = str[1..len+1]; 232 str = str[len+1..$]; 233 234 static if (is(T == immutable)) 235 return s.idup; 236 else static if (is(T == const)) 237 return s.idup; 238 else 239 return s.dup; 240 } 241 else 242 { 243 // ordinary arrays 244 auto len = checkLengthTypeStart(str, 'a'); 245 246 T[] va; 247 for (int i=0; i< len; ++i) 248 va ~= unserialize!(T)(str); 249 return va; 250 } 251 } 252 253 //---------------------------------------------------------------------------- 254 255 T[U] unserialize(T:T[U],U)(ref string str) 256 { 257 auto len = checkLengthTypeStart(str, 'o'); 258 259 T[U] va; 260 for (int i=0; i< len; ++i) 261 { 262 U v = unserialize!(U)(str); 263 va[v] = unserialize!(T)(str); 264 } 265 return va; 266 } 267 268 //---------------------------------------------------------------------------- 269 // Allows custom unserialize functions to be used. 270 271 T[] customUnserialize(alias F,T:T[])(ref string str) 272 { 273 auto len = checkLengthTypeStart(str, 'a'); 274 275 T[] va; 276 for (int i=0; i< len; ++i) 277 va ~= F(str); 278 return va; 279 } 280 281 //---------------------------------------------------------------------------- 282 // Allows custom unserialize functions to be used. 283 284 T[U] customUnserialize(alias F,T:T[U],U)(ref string str) 285 { 286 auto len = checkLengthTypeStart(str, 'o'); 287 288 T[U] va; 289 for (int i=0; i< len; ++i) 290 { 291 U v = unserialize!(U)(str); 292 va[v] = F(str); 293 } 294 return va; 295 } 296 297 //---------------------------------------------------------------------------- 298 299 /* 300 * Will unserialize a general string, but associative arrays can only have 301 * simple types for keys, and all keys in an associative array must be of the 302 * same type. 303 */ 304 305 Variant unserialize(V:Variant)(ref string str) 306 { 307 switch (str[0]) 308 { 309 case 'b': 310 return Variant(unserialize!(bool)(str)); 311 case 'i': 312 return Variant(unserialize!(long)(str)); 313 case 'f': 314 return Variant(unserialize!(double)(str)); 315 case 's': 316 return Variant(unserialize!(string)(str)); 317 case 'a': 318 auto len = checkLengthTypeStart(str, 'a'); 319 320 Variant[] va; 321 foreach (i; 0..len) 322 { 323 va ~= unserialize!(Variant)(str); 324 } 325 return Variant(va); 326 case 'o': 327 auto leno = checkLengthTypeStart(str, 'o'); 328 switch (str[0]) 329 { 330 case 'b': 331 return Variant(unserializeV!(bool)(str,leno)); 332 case 'i': 333 return Variant(unserializeV!(long)(str,leno)); 334 case 'f': 335 return Variant(unserializeV!(double)(str,leno)); 336 case 's': 337 return Variant(unserializeV!(string)(str,leno)); 338 default: 339 throw new Exception(format("Serialize type '%c' not allowed as keys",str[0])); 340 } 341 default: 342 throw new Exception(format("Unknown serialize type '%c'",str[0])); 343 } 344 } 345 346 //---------------------------------------------------------------------------- 347 348 Variant[T] unserializeV(T)(ref string str, uint len) 349 { 350 Variant[T] vaa; 351 foreach (j; 0..len) 352 { 353 auto v = unserialize!(T)(str); 354 vaa[v] = unserialize!(Variant)(str); 355 } 356 return vaa; 357 } 358 359 //---------------------------------------------------------------------------- 360 361 unittest 362 { 363 // Some alias to help make code more legible (and less bugprone). 364 365 alias const(char)[] cstring; 366 alias char[] mstring; 367 368 import std.stdio; 369 /* Caution, with associative arrays, the order is not neccessarily the same 370 * as given. */ 371 assert(serialize!(bool)(true) == "b1"); 372 assert(serialize!(bool)(false) == "b0"); 373 374 assert(serialize!(int)(42) == "i42"); 375 assert(serialize!(int)(33) == "i33"); 376 assert(serialize!(uint)(12) == "i12"); 377 assert(serialize!(ubyte)(16) == "i16"); 378 assert(serialize!(ulong)(183) == "i183"); 379 assert(serialize!(float)(2.5) == "f2.500000"); 380 assert(serialize!(float)(.5) == "f0.500000"); 381 assert(serialize!(int)(-101) == "i-101"); 382 383 mstring ss = "xyz".dup; 384 assert(serialize!(cstring)("abc") == "s3:abc"); 385 assert(serialize!(cstring)("abc\"xf\nl") == "s8:abc\"xf\nl"); 386 assert(serialize!(cstring)(ss) == "s3:xyz"); 387 assert(serialize!(mstring)(ss) == "s3:xyz"); 388 //assert(serialize!string(ss) == "s3:xyz"); 389 390 int[] x = [1,2,3]; 391 string x_s = serialize!(int[])(x); 392 assert(x_s == "a3i1i2i3",x_s); 393 /+ 394 int[cstring] y; 395 y["a"] = 1; 396 y["b"] = 2; 397 string y_s = serialize!(int[cstring])(y); 398 assert(y_s == `o2s1:ai1s1:bi2`,y_s); 399 400 mstring[cstring] z; 401 z["a"] = "x1".dup; 402 z["b"] = "y2".dup; 403 z["c"] = "z3".dup; 404 string z_s = serialize!(mstring[cstring])(z); 405 406 assert(z_s == `o3s1:as2:x1s1:bs2:y2s1:cs2:z3`,z_s); 407 408 cstring[cstring] zc; 409 zc["a"] = "x1"; 410 zc["b"] = "y2"; 411 zc["c"] = "z3"; 412 string zc_s = serialize!(cstring[cstring])(zc); 413 assert(zc_s == `o3s1:as2:x1s1:bs2:y2s1:cs2:z3`,zc_s); 414 415 cstring[cstring] zi; 416 zi["a"] = "x1"; 417 zi["b"] = "y2"; 418 zi["c"] = "z3"; 419 string zi_s = serialize!(cstring[cstring])(zi); 420 assert(zi_s == `o3s1:as2:x1s1:bs2:y2s1:cs2:z3`, zi_s); 421 422 int[][] td = [[1,2,3],[4,5,6,10],[7,8,9]]; 423 string td_s = serialize!(int[][])(td); 424 assert(td_s == "a3a3i1i2i3a4i4i5i6i10a3i7i8i9",td_s); 425 426 int[cstring] tcd1; 427 tcd1["a"] = 1; 428 tcd1["b"] = 2; 429 tcd1["c"] = 3; 430 431 int[cstring] tcd2; 432 tcd2["d"] = 4; 433 tcd2["e"] = 5; 434 tcd2["f"] = 6; 435 436 int[cstring][] tcd = [ ["a":1,"b":2,"c":3], ["d":4,"e":5,"f":6] ]; 437 // tcd ~= tcd1; 438 //tcd ~= tcd2; 439 440 string tcd_s = serialize!(int[cstring][])(tcd); 441 // AA! assert(tcd_s == `a2o3s1:bi2s1:ai1s1:ci3o3s1:fi6s1:ei5s1:di4`,tcd_s); 442 443 string str = "b0b1xx"; 444 assert(unserialize!(bool)(str) == false); 445 assert(unserialize!(bool)(str) == true); 446 assert(str == "xx"); 447 448 str = "a23s241o13yy"; 449 assert(checkLengthTypeStart(str,'a') == 23); 450 assert(checkLengthTypeStart(str,'s') == 241); 451 assert(checkLengthTypeStart(str,'o') == 13); 452 assert(str == "yy"); 453 454 str = "s5:abcdefg"; 455 assert(unserialize!(mstring)(str) == "abcde"); 456 assert(str == "fg"); 457 458 str = "s3:2bcs4:defg"; 459 assert(unserialize!(const(char)[])(str) == "2bc"); 460 assert(unserialize!(string)(str) == "defg"); 461 assert(str == ""); 462 463 str = "i15f2.6i-22i00f-1e5zz"; 464 assert(unserialize!(int)(str) == 15); 465 auto f = unserialize!(float)(str); 466 assert(f == 2.6f, to!string(f)); 467 assert(unserialize!(long)(str) == -22); 468 assert(unserialize!(uint)(str) == 0); 469 auto d = unserialize!(double)(str); 470 assert(d == -100000.0, to!string(d)); 471 assert(str == "zz"); 472 473 assert(unserialize!(int[])(x_s) == x); 474 assert(unserialize!(int[][])(td_s) == td); 475 476 auto y_uns = unserialize!(int[cstring])(y_s); 477 assert(y_uns.length == 2); 478 assert("a" in y_uns); 479 assert("b" in y_uns); 480 assert(y_uns["a"] == 1); 481 assert(y_uns["b"] == 2); 482 //assert(y_uns == y); This doesn't work for some reason. 483 484 auto z_uns = unserialize!(mstring[cstring])(z_s); 485 assert(z_uns.length == 3); 486 assert("a" in z_uns); 487 assert("b" in z_uns); 488 assert("c" in z_uns); 489 assert(z_uns["a"] == "x1"); 490 assert(z_uns["b"] == "y2"); 491 assert(z_uns["c"] == "z3"); 492 493 auto tcd_uns = unserialize!(int[cstring][])(tcd_s); 494 assert(tcd_uns.length == 2); 495 auto tcd1_uns = tcd_uns[0]; 496 assert(tcd1_uns.length == 3); 497 assert("a" in tcd1_uns); 498 assert("b" in tcd1_uns); 499 assert("c" in tcd1_uns); 500 assert(tcd1_uns["a"] == 1); 501 assert(tcd1_uns["b"] == 2); 502 assert(tcd1_uns["c"] == 3); 503 auto tcd2_uns = tcd_uns[1]; 504 assert(tcd2_uns.length == 3); 505 assert("d" in tcd2_uns); 506 assert("e" in tcd2_uns); 507 assert("f" in tcd2_uns); 508 assert(tcd2_uns["d"] == 4); 509 assert(tcd2_uns["e"] == 5); 510 assert(tcd2_uns["f"] == 6); 511 512 513 string tcd_ss = "a2o3s1:ai1s1:bi2s1:ci3o3s1:di4s1:ei5s1:fi6"; 514 auto tcd_v = unserialize!(Variant)(tcd_ss); 515 assert(tcd_v.type() == typeid(Variant[])); 516 assert(tcd_v.length == 2); 517 auto tcd1_v = tcd_v[0]; 518 assert(tcd1_v.length == 3); 519 assert(tcd1_v.type() == typeid(Variant[string])); 520 auto tcd1_vn = tcd1_v.get!(Variant[string]); 521 assert("a" in tcd1_vn); 522 assert("b" in tcd1_vn); 523 assert("c" in tcd1_vn); 524 assert(tcd1_v["a"] == 1); 525 assert(tcd1_v["b"] == 2); 526 assert(tcd1_v["c"] == 3); 527 auto tcd2_v = tcd_v[1]; 528 assert(tcd2_v.length == 3); 529 assert(tcd2_v.type() == typeid(Variant[string])); 530 auto tcd2_vn = tcd2_v.get!(Variant[string]); 531 assert("d" in tcd2_vn); 532 assert("e" in tcd2_vn); 533 assert("f" in tcd2_vn); 534 assert(tcd2_v["d"] == 4); 535 assert(tcd2_v["e"] == 5); 536 assert(tcd2_v["f"] == 6); 537 538 +/ 539 }