1 //Written in the D programming language
2 /*
3  * Serializes data to and from strings.
4  *
5  * It does not support the ability to read/write objects, but does
6  * support arrays and associative arrays.
7  *
8  * To serialize a value, call "serialize!(Type)(value)".
9  *
10  * Unserializing is a bit more tricky. You need to know the type before hand
11  * and unserialise using it. An alternative would be to use Variant, but I
12  * have not got that working.
13  *
14  * To unserialize a value call "unserialize!(Type)(const(char)[] str)".
15  * str will be updated to the remainder of the string and the decoded
16  * value is returned.
17  *
18  * Future direction: Make it all work with ranges, maybe.
19  *
20  * Copyright 2013 Jaypha
21  *
22  * Distributed under the Boost Software License, Version 1.0.
23  * (See http://www.boost.org/LICENSE_1_0.txt)
24  *
25  * Authors: Jason den Dulk
26  */
27 
28 module jaypha.io.serialize;
29 
30 import std.regex;
31 import std.format;
32 import std.array;
33 import std.traits;
34 import std.string;
35 import std.conv;
36 import std.utf;
37 import std.variant;
38 
39 //----------------------------------------------------------------------------
40 //
41 // serialize
42 //
43 //----------------------------------------------------------------------------
44 //
45 // Various serialize functions of the general format
46 // string serialize(T)(T value) where T is a supported type.
47 // Returns the encoded value.
48 //
49 //----------------------------------------------------------------------------
50 
51 string serialize(T)(T value) if (isSomeChar!T)
52 {
53   char[4] v;
54   auto v2 = toUTF8(v,value);
55   return format("s%d:%s",v2.length,v2);
56 }
57 
58 //----------------------------------------------------------------------------
59 
60 string serialize(T:bool)(T value)
61 {
62   return value?"b1":"b0";
63 }
64 
65 //----------------------------------------------------------------------------
66 
67 string serialize(T)(T value) if (isIntegral!(T))
68 {
69   return format("i%d",value);
70 }
71 
72 //----------------------------------------------------------------------------
73 
74 string serialize(T)(T value) if (isFloatingPoint!(T))
75 {
76   return format("f%f",value);
77 }
78 
79 //----------------------------------------------------------------------------
80 
81 string serialize(T:T[])(T[] value)
82 {
83   static if (is(Unqual!T == char))
84     return format("s%d:%s",value.length,value);
85   else static if (is(Unqual!T == wchar) || is(Unqual!T == dchar))
86     return serialize!(const(char)[])(toUTF8(value));
87   else
88   {
89     auto x = appender!(string);
90     formattedWrite(x,"a%d",value.length);
91     foreach (idx,val; value)
92       x.put(serialize!(T)(val));
93     return x.data;
94   }
95 }
96 
97 //----------------------------------------------------------------------------
98 
99 string serialize(T:T[U],U)(T[U] value)
100 {
101   auto x = appender!string();
102   formattedWrite(x,"o%d",value.length);
103   foreach (idx,val; value)
104   {
105     x.put(serialize!(U)(idx));
106     x.put(serialize!(T)(val));
107   }
108   return x.data;
109 }
110 
111 //----------------------------------------------------------------------------
112 
113 string customSerialize(alias F,T:T[])(T[] value)
114 {
115   auto x = appender!(string);
116   formattedWrite(x,"a%d",value.length);
117   foreach (idx,val; value)
118     x.put(F(val));
119   return x.data;
120 }
121 
122 //----------------------------------------------------------------------------
123 
124 string customSerialize(alias F,T:T[U],U)(T[U] value)
125 {
126   auto x = appender!(string);
127   formattedWrite(x,"o%d",value.length);
128   foreach (idx,val; value)
129   {
130     x.put(serialize!(U)(idx));
131     x.put(F(val));
132   }
133   return x.data;
134 }
135 
136 //----------------------------------------------------------------------------
137 // checkLengthTypeStart
138 //----------------------------------------------------------------------------
139 // Used to parse the start of s, a and o types. Returns the extracted length.
140 //----------------------------------------------------------------------------
141 
142 uint checkLengthTypeStart(ref string str, dchar type)
143 {
144   if (str.length == 0 || str[0] != type)
145     throw new Exception(format("Malformed serialize string. Expecting '%c', got %s",type,str));
146 
147   auto m = match(str[1..$],php_z_uint);
148   if (!m)
149     throw new Exception(format("Malformed serialize string. Expecting integer, got %s",str));
150 
151   str = str[m.hit.length+1..$];
152   return to!uint(m.hit);
153 }
154 
155 //----------------------------------------------------------------------------
156 //
157 // unserialize
158 //
159 //----------------------------------------------------------------------------
160 //
161 // Various unserialize functions of the general format
162 // T unserialize(T)(ref const(char)[] str) where T is a supported type.
163 // Returns the decoded value, and str is updated to the remainder of the
164 // string.
165 //
166 //----------------------------------------------------------------------------
167 
168 enum php_z_int = ctRegex!(r"^-?\d+");
169 enum php_z_uint = ctRegex!(r"^\d+");
170 enum php_z_float = ctRegex!(r"^-?\d+(\.\d+)?((E|e)(\+|-)?\d+)?");
171 
172 T unserialize(T:bool)(ref string str)
173 {
174   scope(success) { str = str[2..$]; }
175   if (str.length <2) throw new Exception(format("Malformed serialize string. Expecting \"b0\"/\"b1\", got \"%s\"",str));
176   if (str[0..2] == "b1") return true;
177   else if (str[0..2] == "b0") return false;
178   else throw new Exception(format("Malformed serialize string. Expecting \"b0\"/\"b1\", got \"%s\"",str[0..2]));
179 }
180 
181 //----------------------------------------------------------------------------
182 
183 T unserialize(T)(ref string str) if (isIntegral!T)
184 {
185   if (str[0] != 'i')
186     throw new Exception(format("Malformed serialize string. Expecting 'i', got %s",str));
187 
188   auto m = match(str[1..$],php_z_int);
189   if (!m)
190     throw new Exception(format("Malformed serialize string. Expecting integer, got %s",str));
191 
192   const(char)[] val = m.hit;
193   
194   str = str[val.length+1..$];
195   return to!T(val);
196 }
197 
198 //----------------------------------------------------------------------------
199 
200 T unserialize(T)(ref string str) if (isFloatingPoint!T)
201 {
202   if (str[0] != 'f')
203     throw new Exception(format("Malformed serialize string. Expecting 'f', got %s",str));
204 
205   auto m = match(str[1..$],php_z_float);
206   if (!m)
207     throw new Exception(format("Malformed serialize string. Expecting float, got %s",str));
208 
209   const(char)[] val = m.hit;
210   
211   str = str[val.length+1..$];
212   return to!T(val);
213 }
214 
215 //----------------------------------------------------------------------------
216 
217 T[] unserialize(T:T[])(ref string str)
218 {
219   static if (isSomeChar!T)
220   {
221     // string values.
222 
223     auto len = checkLengthTypeStart(str, 's');
224 
225     if (str[0] != ':')
226       throw new Exception(format("Malformed serialize string. Expecting ':', got %s",str));
227 
228     if (str.length < len)
229       throw new Exception(format("Malformed serialize string. Expecting string of length %d, got %s",len,str));
230 
231     auto s = str[1..len+1];
232     str = str[len+1..$];
233 
234     static if (is(T == immutable))
235       return s.idup;
236     else static if (is(T == const))
237       return s.idup;
238     else
239       return s.dup;
240   }
241   else
242   {
243     // ordinary arrays
244     auto len = checkLengthTypeStart(str, 'a');
245     
246     T[] va;
247     for (int i=0; i< len; ++i)
248       va ~= unserialize!(T)(str);
249     return va;
250   }
251 }
252 
253 //----------------------------------------------------------------------------
254 
255 T[U] unserialize(T:T[U],U)(ref string str)
256 {
257   auto len = checkLengthTypeStart(str, 'o');
258 
259   T[U] va;
260   for (int i=0; i< len; ++i)
261   {
262     U v = unserialize!(U)(str);
263     va[v] = unserialize!(T)(str);
264   }
265   return va;
266 }
267 
268 //----------------------------------------------------------------------------
269 // Allows custom unserialize functions to be used.
270 
271 T[] customUnserialize(alias F,T:T[])(ref string str)
272 {
273   auto len = checkLengthTypeStart(str, 'a');
274 
275   T[] va;
276   for (int i=0; i< len; ++i)
277     va ~= F(str);
278   return va;
279 }
280 
281 //----------------------------------------------------------------------------
282 // Allows custom unserialize functions to be used.
283 
284 T[U] customUnserialize(alias F,T:T[U],U)(ref string str)
285 {
286   auto len = checkLengthTypeStart(str, 'o');
287 
288   T[U] va;
289   for (int i=0; i< len; ++i)
290   {
291     U v = unserialize!(U)(str);
292     va[v] = F(str);
293   }
294   return va;
295 }
296 
297 //----------------------------------------------------------------------------
298 
299 /*
300  * Will unserialize a general string, but associative arrays can only have
301  * simple types for keys, and all keys in an associative array must be of the
302  * same type.
303  */
304 
305 Variant unserialize(V:Variant)(ref string str)
306 {
307   switch (str[0])
308   {
309     case 'b':
310       return Variant(unserialize!(bool)(str));
311     case 'i':
312       return Variant(unserialize!(long)(str));
313     case 'f':
314       return Variant(unserialize!(double)(str));
315     case 's':
316       return Variant(unserialize!(string)(str));
317     case 'a':
318       auto len = checkLengthTypeStart(str, 'a');
319 
320       Variant[] va;
321       foreach (i; 0..len)
322       {
323         va ~= unserialize!(Variant)(str);
324       }
325       return Variant(va);
326     case 'o':
327       auto leno = checkLengthTypeStart(str, 'o');
328       switch (str[0])
329       {
330         case 'b':
331           return Variant(unserializeV!(bool)(str,leno));
332         case 'i':
333           return Variant(unserializeV!(long)(str,leno));
334         case 'f':
335           return Variant(unserializeV!(double)(str,leno));
336         case 's':
337           return Variant(unserializeV!(string)(str,leno));
338         default:
339           throw new Exception(format("Serialize type '%c' not allowed as keys",str[0]));
340       }
341     default:
342       throw new Exception(format("Unknown serialize type '%c'",str[0]));
343   }
344 }
345 
346 //----------------------------------------------------------------------------
347 
348 Variant[T] unserializeV(T)(ref string str, uint len)
349 {
350   Variant[T] vaa;
351   foreach (j; 0..len)
352   {
353     auto v = unserialize!(T)(str);
354     vaa[v] = unserialize!(Variant)(str);
355   }
356   return vaa;
357 }
358 
359 //----------------------------------------------------------------------------
360 
361 unittest
362 {
363   // Some alias to help make code more legible (and less bugprone).
364 
365   alias const(char)[] cstring;
366   alias char[] mstring;
367 
368   import std.stdio;
369   /* Caution, with associative arrays, the order is not neccessarily the same
370    * as given. */
371   assert(serialize!(bool)(true) == "b1");
372   assert(serialize!(bool)(false) == "b0");
373 
374   assert(serialize!(int)(42) == "i42");
375   assert(serialize!(int)(33) == "i33");
376   assert(serialize!(uint)(12) == "i12");
377   assert(serialize!(ubyte)(16) == "i16");
378   assert(serialize!(ulong)(183) == "i183");
379   assert(serialize!(float)(2.5) == "f2.500000");
380   assert(serialize!(float)(.5) == "f0.500000");
381   assert(serialize!(int)(-101) == "i-101");
382   
383   mstring ss = "xyz".dup;
384   assert(serialize!(cstring)("abc") == "s3:abc");
385   assert(serialize!(cstring)("abc\"xf\nl") == "s8:abc\"xf\nl");
386   assert(serialize!(cstring)(ss) == "s3:xyz");
387   assert(serialize!(mstring)(ss) == "s3:xyz");
388   //assert(serialize!string(ss) == "s3:xyz");
389 
390   int[] x = [1,2,3];
391   string x_s = serialize!(int[])(x);
392   assert(x_s == "a3i1i2i3",x_s);
393 /+
394   int[cstring] y;
395   y["a"] = 1;
396   y["b"] = 2;
397   string y_s = serialize!(int[cstring])(y);
398   assert(y_s == `o2s1:ai1s1:bi2`,y_s);
399 
400   mstring[cstring] z;
401   z["a"] = "x1".dup;
402   z["b"] = "y2".dup;
403   z["c"] = "z3".dup;
404   string z_s = serialize!(mstring[cstring])(z);
405   
406   assert(z_s == `o3s1:as2:x1s1:bs2:y2s1:cs2:z3`,z_s);
407 
408   cstring[cstring] zc;
409   zc["a"] = "x1";
410   zc["b"] = "y2";
411   zc["c"] = "z3";
412   string zc_s = serialize!(cstring[cstring])(zc);
413   assert(zc_s == `o3s1:as2:x1s1:bs2:y2s1:cs2:z3`,zc_s);
414 
415   cstring[cstring] zi;
416   zi["a"] = "x1";
417   zi["b"] = "y2";
418   zi["c"] = "z3";
419   string zi_s = serialize!(cstring[cstring])(zi);
420   assert(zi_s == `o3s1:as2:x1s1:bs2:y2s1:cs2:z3`, zi_s);
421 
422   int[][] td = [[1,2,3],[4,5,6,10],[7,8,9]];
423   string td_s = serialize!(int[][])(td);
424   assert(td_s == "a3a3i1i2i3a4i4i5i6i10a3i7i8i9",td_s);
425 
426   int[cstring] tcd1;
427   tcd1["a"] = 1;
428   tcd1["b"] = 2;
429   tcd1["c"] = 3;
430 
431   int[cstring] tcd2;
432   tcd2["d"] = 4;
433   tcd2["e"] = 5;
434   tcd2["f"] = 6;
435 
436   int[cstring][] tcd = [ ["a":1,"b":2,"c":3], ["d":4,"e":5,"f":6] ];
437 //  tcd ~= tcd1;
438   //tcd ~= tcd2;
439 
440   string tcd_s = serialize!(int[cstring][])(tcd);
441   // AA! assert(tcd_s == `a2o3s1:bi2s1:ai1s1:ci3o3s1:fi6s1:ei5s1:di4`,tcd_s);
442 
443   string str = "b0b1xx";
444   assert(unserialize!(bool)(str) == false);
445   assert(unserialize!(bool)(str) == true);
446   assert(str == "xx");
447 
448   str = "a23s241o13yy";
449   assert(checkLengthTypeStart(str,'a') == 23);
450   assert(checkLengthTypeStart(str,'s') == 241);
451   assert(checkLengthTypeStart(str,'o') == 13);
452   assert(str == "yy");
453 
454   str = "s5:abcdefg";
455   assert(unserialize!(mstring)(str) == "abcde");
456   assert(str == "fg");
457 
458   str = "s3:2bcs4:defg";
459   assert(unserialize!(const(char)[])(str) == "2bc");
460   assert(unserialize!(string)(str) == "defg");
461   assert(str == "");
462 
463   str = "i15f2.6i-22i00f-1e5zz";
464   assert(unserialize!(int)(str) == 15);
465   auto f = unserialize!(float)(str);
466   assert(f == 2.6f, to!string(f));
467   assert(unserialize!(long)(str) == -22);
468   assert(unserialize!(uint)(str) == 0);
469   auto d = unserialize!(double)(str);
470   assert(d == -100000.0, to!string(d));
471   assert(str == "zz");
472 
473   assert(unserialize!(int[])(x_s) == x);
474   assert(unserialize!(int[][])(td_s) == td);
475 
476   auto y_uns = unserialize!(int[cstring])(y_s);
477   assert(y_uns.length == 2);
478   assert("a" in y_uns);
479   assert("b" in y_uns);
480   assert(y_uns["a"] == 1);
481   assert(y_uns["b"] == 2);
482   //assert(y_uns == y);    This doesn't work for some reason.
483 
484   auto z_uns = unserialize!(mstring[cstring])(z_s);
485   assert(z_uns.length == 3);
486   assert("a" in z_uns);
487   assert("b" in z_uns);
488   assert("c" in z_uns);
489   assert(z_uns["a"] == "x1");
490   assert(z_uns["b"] == "y2");
491   assert(z_uns["c"] == "z3");
492 
493   auto tcd_uns = unserialize!(int[cstring][])(tcd_s);
494   assert(tcd_uns.length == 2);
495   auto tcd1_uns = tcd_uns[0];
496   assert(tcd1_uns.length == 3);
497   assert("a" in tcd1_uns);
498   assert("b" in tcd1_uns);
499   assert("c" in tcd1_uns);
500   assert(tcd1_uns["a"] == 1);
501   assert(tcd1_uns["b"] == 2);
502   assert(tcd1_uns["c"] == 3);
503   auto tcd2_uns = tcd_uns[1];
504   assert(tcd2_uns.length == 3);
505   assert("d" in tcd2_uns);
506   assert("e" in tcd2_uns);
507   assert("f" in tcd2_uns);
508   assert(tcd2_uns["d"] == 4);
509   assert(tcd2_uns["e"] == 5);
510   assert(tcd2_uns["f"] == 6);
511 
512 
513   string tcd_ss = "a2o3s1:ai1s1:bi2s1:ci3o3s1:di4s1:ei5s1:fi6";
514   auto tcd_v = unserialize!(Variant)(tcd_ss);
515   assert(tcd_v.type() == typeid(Variant[]));
516   assert(tcd_v.length == 2);
517   auto tcd1_v = tcd_v[0];
518   assert(tcd1_v.length == 3);
519   assert(tcd1_v.type() == typeid(Variant[string]));
520   auto tcd1_vn = tcd1_v.get!(Variant[string]);
521   assert("a" in tcd1_vn);
522   assert("b" in tcd1_vn);
523   assert("c" in tcd1_vn);
524   assert(tcd1_v["a"] == 1);
525   assert(tcd1_v["b"] == 2);
526   assert(tcd1_v["c"] == 3);
527   auto tcd2_v = tcd_v[1];
528   assert(tcd2_v.length == 3);
529   assert(tcd2_v.type() == typeid(Variant[string]));
530   auto tcd2_vn = tcd2_v.get!(Variant[string]);
531   assert("d" in tcd2_vn);
532   assert("e" in tcd2_vn);
533   assert("f" in tcd2_vn);
534   assert(tcd2_v["d"] == 4);
535   assert(tcd2_v["e"] == 5);
536   assert(tcd2_v["f"] == 6);
537 
538 +/
539 }