Daniel Clifford

V8 Performance Tuning Tricks





Please enter questions in Google Moderator at goo.gl/rVf9s

Care about performance?

Before you optimize...

Example: protocol parser

 

data length\u0000data bytes
12\u0000gddberlin.de

 

Parser.prototype.parse = function (s) {
  var length = '';
  for (var i = 0; i < s.length; i++) {
    if (s[i] == '\u0000') {
      length = Number(length);
      this.emit('data', s.substr(i + 1, length));
      return this.parse(s.substr(i + 1 + length));
    } else {
      length += s[i];
    }
  }
};

Prepare fake input data

Prepare fake input data

function makeFakePacket() { 
  var length = randomBetween(MIN_PACKET_LENGTH,
                             MAX_PACKET_LENGTH);
  var p = length + '\u0000';
  for (var i = 0; i < length; ++i) p += ' ';
  return p;
}

var fakeInput = '';
for (var i = 0; i < NOF_PACKETS; i++) {
  fakeInput += makeFakePacket();
}

Stress it

var MIN_PACKET_LENGTH = 10;
var MAX_PACKET_LENGTH = 100;
var NOF_PACKETS = 1000;
var NOF_RUNS = 1000;

Stress it

var p = new Parser();

var start = Date.now();
for (var j = 0; j < NOF_RUNS; j++) {
  p.parse(fakeInput);
}
var end = Date.now();

var timeSpent = end - start;
var totalBytes = fakeInput.length * NOF_RUNS;

print(timeSpent + ' ms');
print(totalBytes/timeSpent + ' bytes/ms');

Stress it

% out/ia32.release/d8 parser.js
319 ms
357680.25078369904 bytes/ms

Profile it

% out/ia32.release/d8 --prof parser.js
319 ms
357680.25078369904 bytes/ms

Built-in sampling profiler

Profile it

% out/ia32.release/d8 --prof parser.js
319 ms
357680.25078369904 bytes/ms
% tools/mac-tick-processor
 [JavaScript]:
   ticks  total  nonlib   name
105 34.2% 41.0% LazyCompile: *Parser.parse parser.js:54
57 18.6% 22.3% LazyCompile: *substr native string.js:702 26 8.5% 10.2% Stub: SubStringStub 18 5.9% 7.0% LazyCompile: ToNumber native runtime.js:526 11 3.6% 4.3% Stub: StringAddStub [GC]: ticks total nonlib name 5 3.2%

 

Parser.prototype.parse = function (s) {
  var length = '';
  for (var i = 0; i < s.length; i++) {
    if (s[i] == '\u0000') {
      length = Number(length);
      this.emit('data', s.substr(i + 1, length));
      return this.parse(s.substr(i + 1 + length));
    } else {
      length += s[i];
    }
  }
};
Parser.prototype.parse = function (s) {
  var length = '', i = 0;
  while (i < s.length) {
    if (s[i] == '\u0000') {
      length = Number(length);
      this.emit('data', s.substr(i + 1, length));
s = s.substr(i + 1 + x); i = 0; length = '';
} else { length += s[i++]; } } };

Profile it (again)

% out/ia32.release/d8 --prof parser.js
289 ms
394809.68858131487 bytes/ms
% tools/mac-tick-processor
 [JavaScript]:
   ticks  total  nonlib   name
     88   28.9%   34.0%  LazyCompile: *Parser.parse parser.js:35
62 20.4% 23.9% LazyCompile: *substr native string.js:702 35 11.5% 13.5% Stub: SubStringStub
18 5.9% 6.9% LazyCompile: ToNumber native runtime.js:526 12 3.9% 4.6% Stub: StringAddStub [GC]: ticks total nonlib name 6 2.0%

 

Parser.prototype.parse = function (s) {
  var length = '', i = 0;
  while (i < s.length) {
    if (s[i] == '\u0000') {
      length = Number(length);
      this.emit('data', s.substr(i + 1, length));
s = s.substr(i + 1 + x); i = 0;
length = ''; } else { length += s[i++]; } } };
Parser.prototype.parse = function (s) {
  var length = '', i = 0;
  while (i < s.length) {
    if (s[i] == '\u0000') {
      length = Number(length);
      this.emit('data', s.substr(i + 1, length));
i += length + 1;
length = ''; } else { length += s[i++]; } } }

Profile it (yet again)

% out/ia32.release/d8 --prof parser.js
204 ms
559313.725490196 bytes/ms
% tools/mac-tick-processor
 [JavaScript]:
   ticks  total  nonlib   name
     56   28.0%   33.7%  LazyCompile: *Parser.parse parser.js:74
     25   12.5%   15.1%  LazyCompile: *substr native string.js:702
21 10.5% 12.7% LazyCompile: ToNumber native runtime.js:526 10 5.0% 6.0% Stub: StringAddStub
11 5.5% 6.6% Stub: SubStringStub [GC]: ticks total nonlib name 5 2.5%

 

Parser.prototype.parse = function (s) {
  var length = '', i = 0;
  while (i < s.length) {
if (s[i] == '\u0000') { length = Number(length);
this.emit('data', s.substr(i + 1, length)); i += length + 1; length = ''; } else {
length += s[i++];
} } }
Parser.prototype.parse = function (s) {
  var length = 0, i = 0, ch0 = "0".charCodeAt(0);
  while (i < s.length) {
var ch = s.charCodeAt(i); if (ch === 0) {
this.emit('data', s.substr(i + 1, length)); i += l + 1; length = 0; } else {
length = length * 10 + (ch - ch0);
} } }

So how much did it help?

% out/ia32.release/d8 parser.js
319 ms
357680 bytes/ms

vs.

% out/ia32.release/d8 parser.js
103 ms
1107766 bytes/ms

Understanding the Optimizing Compiler

NOF_RUNS = 10

% out/ia32.release/d8 --prof parser.js
6 ms
95568.33333333333 bytes/ms
% tools/mac-tick-processor
[JavaScript]:
3   15.8%   37.5%  Stub: StringAddStub
3   15.8%   37.5%  LazyCompile: substr native string.js:702
1    5.3%   12.5%  Stub: SubStringStub

NOF_RUNS = 1000

% out/ia32.release/d8 --prof parser.js
319 ms
357680.25078369904 bytes/ms
% tools/mac-tick-processor
[JavaScript]:
47  20.4%   20.4%  LazyCompile: *Parser.parse
33  14.3%   14.3%  Stub: SubString
21   9.1%    9.1%  LazyCompile: *substr native
14   6.1%    6.1%  LazyCompile: ~ToNumber native
12   5.2%    5.2%  Stub: StringAdd

NOF_RUNS = 1000

% out/ia32.release/d8 --prof parser.js
319 ms
357680.25078369904 bytes/ms
% tools/mac-tick-processor
[JavaScript]:
47 20.4% 20.4% LazyCompile: *Parser.parse
33 14.3% 14.3% Stub: SubString
21 9.1% 9.1% LazyCompile: *substr native
14 6.1% 6.1% LazyCompile: ~ToNumber native 12 5.2% 5.2% Stub: StringAdd

Code starts unoptimized

Parser.parse

String.substr

makeFakePacket

randomBetween

V8 profiles for hotspots

Parser.parse

String.substr

makeFakePacket

randomBetween

Optimizes hot functions

*Parser.parse

*String.substr

makeFakePacket

randomBetween

 

d8 --trace-opt parser.js


log names of optimized functions to stdout

 

Not all constructs are supported by optimizing compiler

In hot code

Avoid with

and try {} catch (e) {} for now

 

d8 --trace-bailout parser.js


log optimizing compiler bailouts

 

Optimizations are:

 

d8 --trace-deopt parser.js


log deoptimizations

 

chrome --js-flags='--trace-opt
--trace-deopt --trace-bailout'


Can be used in Chrome, too

Understanding Objects

function Point(x, y) {
this.x = x;
this.y = y;
}
var p1 = new Point(11, 22);
var p2 = new Point(33, 44);
p2.z = 55;
// p1 and p2 now have // different hidden classes
v = obj.x
v = Runtime_GetProperty(obj, 'x');
function Runtime_GetProperty(obj, f) {
  var clazz = HiddenClass(obj);
  return obj[clazz.IndexOfField(f)];
}
if (HiddenClass(obj) == cache.clazz) {
  v = obj[cache.index];
} else {
  v = Runtime_LoadCache_Miss(cache, obj, 'x');
}
function Runtime_LoadCache_Miss(cache, obj, f) {
  var clazz = HiddenClass(obj);
  var index = clazz.IndexOfField(f);
  cache.clazz = clazz;
  cache.index = index;
  return obj[index];
}

 

Monomorphic sites are better than polymorphic

Dictionary mode for Properties

For hot objects:

Understanding Numbers

Number representations

 

double value assignments to properties cause allocation

e.g. o.x = 2.5; a[0] = 4.5;

Understanding Elements

Elements are Optimized

Dictionary Elements

Fast Elements

In hot code

Prefer Fast Elements over Dictionary Elements

In hot code

Pre-allocate sized arrays

use new Array(num)

In hot code

Avoid accessing uninitialized elements

e.g. a = new Array(); a[0] |= b;

In hot code

WebGL typed arrays avoid HeapNumber allocation

Float32Array, Float64Array

Variables & Scopes

Two types of variables

function f() {
  var v1;  // real local variable
  var v2;  // context allocated
  var v3;  // context allocated

  function g() { use(v2); }
  function h() { use(v3); }
}

 

Contexts are created eagerly

function foo(arg1, arg2, arg3) {
  /* V8 allocates context, copies arg2 to it */

  if (almostAlwaysFalse()) {
    return function () { return arg2; }
  }

  /* Context is not used */
}

 

Thank you!

... and a big thanks to Vyacheslav Egorov for much of the material for this presentation

Questions?