JavaScript: Sets, Objects and Arrays

JavaScript has a new (well well) fancy Set datastructure (that does not come with functions for union, intersection and the likes, but whatever). A little while ago I tested Binary Search (also not in the standard library) and I was quite impressed with the performance.

When I code JavaScript I often hesitate about using an Array or an Object. And I have not started using Set much.

I decided to make some tests. Lets say we have pseudo-random natural numbers (like 10000 of them). We then want to check if a number is among the 10000 numbers or not (if it is a member of the set). A JavaScript Set does exactly that. A JavaScript Object just requires you to do: set[314] = true and you are basically done (it gets converted to a string, though). For an Array you just push(314), sort the array, and then use binary search to see if the value is there.

Obviously, if you often add or remove value, (re)sorting the Array will be annoying and costly. But quite often this is not the case.

The test
My test consists of generating N=10000 random unique numbers (with distance 1 or 2 between them). I then insert them (in a kind of pseudo-random order) into an Array (and sorts it), into an Object, and into a Set. I measure this time as an initiation time (for each data structure).

I repeat. So now I have 2xArrays, 2xObjects, 2xSets.

This way I can test both iterating and searching with all combinations of data structures (and check that the results are the same and thus correct).

Output of a single run: 100 iterations, N=10000, on a Linux Intel i5 and Node.js 8.9.1 looks like this:

                         ====== Search Structure ======
(ms)                        Array     Object      Set
     Initiate                1338        192      282
===== Iterate =====    
        Array                 800         39       93
       Object                 853        122      170
          Set                1147         82      131

By comparing columns you can compare the cost of searching (and initiating the structure before searching it). By comparing rows you can compare the cost of iterating over the different data structures (for example, iterating over Set while searching Array took 1147ms).

These results are quite consistent on this machine.

Findings
Some findings are very clear (I guess they are quite consistent across systems):

  • Putting values in an Array, to sort it, and the search it, is much slower and makes little sense compared to using an Object (or a Set)
  • Iterating an Array is a bit faster than iterating an Object or Set, so if you are never going to search an Array is faster
  • The newer and more specialized Set offers little advantage to good old Objects

What is more unclear is why iterating over Objects is faster when searching Arrays, but iterating over Sets if faster when searching Objects or Sets. What I find is:

  • Sets seem to perform comparably to Objects on Raspberry Pi, ARMv7.
  • Sets seem to underperform more on Mac OS X

Obviusly, all this is very unclear and can vary depending on CPU-cache, Node-version, OS and other factors.

Smaller and Larger sets
These findings hold quite well for smaller N=100 and larger N=1000000. The Array, despite being O(n log n), does not get much more worse for N=1000000 than it already was for N=10000.

Conclusions and Recommendation
I think the conservative choice is to use Arrays when order is important or you know you will not look for a member based on its unique id. If members have unique IDs and are not ordered, use Object. I see no reason to use Set, especially if you target browsers (support in IE is still limited in early 2018).

The Code
Here follows the source code. Output is not quite as pretty as the table above.

var lodash = require('lodash');

function randomarray(size) {
  var a = new Array(size);
  var x = 0;
  var i, r;
  var j = 0;
  var prime = 3;

  if ( 50   < size ) prime = 31;
  if ( 500  < size ) prime = 313;
  if ( 5000 < size ) prime = 3109;

  for ( i=0 ; i<size ; i++ ) {
    r = 1 + Math.floor(2 * Math.random());
    x += r;
    a[j] = '' + x;
    j += prime;
    if ( size <= j ) j-=size;
  }
  return a;
}

var times = {
  arr : {
    make : 0,
    arr  : 0,
    obj  : 0,
    set  : 0
  },
  obj : {
    make : 0,
    arr  : 0,
    obj  : 0,
    set  : 0
  },
  set : {
    make : 0,
    arr  : 0,
    obj  : 0,
    set  : 0
  }
}

function make_array(a) {
  times.arr.make -= Date.now();
  var i;
  var r = new Array(a.length);
  for ( i=a.length-1 ; 0<=i ; i-- ) {
    r[i] = a[i];
  }
  r.sort();
  times.arr.make += Date.now();
  return r;
}

function make_object(a) {
  times.obj.make -= Date.now();
  var i;
  var r = {};
  for ( i=a.length-1 ; 0<=i ; i-- ) {
    r[a[i]] = true;
  }
  times.obj.make += Date.now();
  return r;
}

function make_set(a) {
  times.set.make -= Date.now();
  var i;
  var r = new Set();
  for ( i=a.length-1 ; 0<=i ; i-- ) {
    r.add(a[i]);
  }
  times.set.make += Date.now();
  return r;
}

function make_triplet(n) {
  var r = randomarray(n);
  return {
    arr : make_array(r),
    obj : make_object(r),
    set : make_set(r)
  };
}

function match_triplets(t1,t2) {
  var i;
  var m = [];
  m.push(match_array_array(t1.arr , t2.arr));
  m.push(match_array_object(t1.arr , t2.obj));
  m.push(match_array_set(t1.arr , t2.set));
  m.push(match_object_array(t1.obj , t2.arr));
  m.push(match_object_object(t1.obj , t2.obj));
  m.push(match_object_set(t1.obj , t2.set));
  m.push(match_set_array(t1.set , t2.arr));
  m.push(match_set_object(t1.set , t2.obj));
  m.push(match_set_set(t1.set , t2.set));
  for ( i=1 ; i<m.length ; i++ ) {
    if ( m[0] !== m[i] ) {
      console.log('m[0]=' + m[0] + ' != m[' + i + ']=' + m[i]);
    }
  }
}

function match_array_array(a1,a2) {
  times.arr.arr -= Date.now();
  var r = 0;
  var i, v;
  for ( i=a1.length-1 ; 0<=i ; i-- ) {
    v = a1[i];
    if ( v === a2[lodash.sortedIndex(a2,v)] ) r++;
  }
  times.arr.arr += Date.now();
  return r;
}

function match_array_object(a1,o2) {
  times.arr.obj -= Date.now();
  var r = 0;
  var i;
  for ( i=a1.length-1 ; 0<=i ; i-- ) {
    if ( o2[a1[i]] ) r++;
  }
  times.arr.obj += Date.now();
  return r;
}

function match_array_set(a1,s2) {
  times.arr.set -= Date.now();
  var r = 0;
  var i;
  for ( i=a1.length-1 ; 0<=i ; i-- ) {
    if ( s2.has(a1[i]) ) r++;
  }
  times.arr.set += Date.now();
  return r;
}

function match_object_array(o1,a2) {
  times.obj.arr -= Date.now();
  var r = 0;
  var v;
  for ( v in o1 ) {
    if ( v === a2[lodash.sortedIndex(a2,v)] ) r++;
  }
  times.obj.arr += Date.now();
  return r;
}

function match_object_object(o1,o2) {
  times.obj.obj -= Date.now();
  var r = 0;
  var v;
  for ( v in o1 ) {
    if ( o2[v] ) r++;
  }
  times.obj.obj += Date.now();
  return r;
}

function match_object_set(o1,s2) {
  times.obj.set -= Date.now();
  var r = 0;
  var v;
  for ( v in o1 ) {
    if ( s2.has(v) ) r++;
  }
  times.obj.set += Date.now();
  return r;
}

function match_set_array(s1,a2) {
  times.set.arr -= Date.now();
  var r = 0;
  var v;
  var iter = s1[Symbol.iterator]();
  while ( ( v = iter.next().value ) ) {
    if ( v === a2[lodash.sortedIndex(a2,v)] ) r++;
  }
  times.set.arr += Date.now();
  return r;
}

function match_set_object(s1,o2) {
  times.set.obj -= Date.now();
  var r = 0;
  var v;
  var iter = s1[Symbol.iterator]();
  while ( ( v = iter.next().value ) ) {
    if ( o2[v] ) r++;
  }
  times.set.obj += Date.now();
  return r;
}

function match_set_set(s1,s2) {
  times.set.set -= Date.now();
  var r = 0;
  var v;
  var iter = s1[Symbol.iterator]();
  while ( ( v = iter.next().value ) ) {
    if ( s2.has(v) ) r++;
  }
  times.set.set += Date.now();
  return r;
}

function main() {
  var i;
  var t1;
  var t2;

  for ( i=0 ; i<100 ; i++ ) {
    t1 = make_triplet(10000);
    t2 = make_triplet(10000);
    match_triplets(t1,t2);
    match_triplets(t2,t1);
  }

  console.log('TIME=' + JSON.stringify(times,null,4));
}

main();

Leave a Comment


NOTE - You can use these HTML tags and attributes:
<a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>

Time limit is exhausted. Please reload CAPTCHA.