Skip to content
This repository was archived by the owner on Aug 15, 2019. It is now read-only.

WebGL backend hands off kernel execution to CPU if inputs are small and on the CPU #1371

Merged
merged 18 commits into from
Nov 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion scripts/test-travis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,6 @@ if [[ $(node -v) = *v10* ]]; then
"run-browserstack --browsers=bs_ios_11 --backend webgl" \
"run-browserstack --browsers=bs_ios_11 --features '{\"HAS_WEBGL\": false}' --backend cpu" \
"run-browserstack --browsers=bs_firefox_mac" \
"run-browserstack --browsers=bs_chrome_mac"
"run-browserstack --browsers=bs_chrome_mac" \
"run-browserstack --browsers=bs_chrome_mac --features '{\"WEBGL_CPU_FORWARD\": true}' --backend webgl"
fi
2 changes: 2 additions & 0 deletions src/environment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ export class Environment {
(typeof process.versions.node !== 'undefined');
} else if (feature === 'IS_CHROME') {
return isChrome();
} else if (feature === 'WEBGL_CPU_FORWARD') {
return true;
} else if (feature === 'WEBGL_PACK_BATCHNORMALIZATION') {
return false;
} else if (feature === 'WEBGL_CONV_IM2COL') {
Expand Down
3 changes: 3 additions & 0 deletions src/environment_util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ export interface Features {
'IS_BROWSER'?: boolean;
// Whether we are in the Node.js environment.
'IS_NODE'?: boolean;
// Whether the WebGL backend will sometimes forward ops to the CPU.
'WEBGL_CPU_FORWARD'?: boolean;
// Whether we will pack the batchnormalization op.
'WEBGL_PACK_BATCHNORMALIZATION'?: boolean;
// Whether we will use the im2col algorithm to speed up convolutions.
Expand Down Expand Up @@ -83,6 +85,7 @@ export enum Type {
export const URL_PROPERTIES: URLProperty[] = [
{name: 'DEBUG', type: Type.BOOLEAN},
{name: 'IS_BROWSER', type: Type.BOOLEAN},
{name: 'WEBGL_CPU_FORWARD', type: Type.BOOLEAN},
{name: 'WEBGL_PACK_BATCHNORMALIZATION', type: Type.BOOLEAN},
{name: 'WEBGL_CONV_IM2COL', type: Type.BOOLEAN},
{name: 'WEBGL_MAX_TEXTURE_SIZE', type: Type.NUMBER},
Expand Down
4 changes: 2 additions & 2 deletions src/jasmine_util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ export let TEST_ENVS: TestEnv[] = [
{
name: 'test-webgl1',
factory: () => new MathBackendWebGL(),
features: {'WEBGL_VERSION': 1}
features: {'WEBGL_VERSION': 1, 'WEBGL_CPU_FORWARD': false}
},
{
name: 'test-webgl2',
factory: () => new MathBackendWebGL(),
features: {'WEBGL_VERSION': 2}
features: {'WEBGL_VERSION': 2, 'WEBGL_CPU_FORWARD': false}
},
{
name: 'test-cpu',
Expand Down
70 changes: 70 additions & 0 deletions src/kernels/backend_webgl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import {DataId, Scalar, setTensorTracker, Tensor, Tensor1D, Tensor2D, Tensor3D,
import {DataType, DataTypeMap, Rank, RecursiveArray, ShapeMap, sumOutType, TypedArray, upcastType} from '../types';
import * as util from '../util';
import {getTypedArrayFromDType, sizeFromShape} from '../util';

import {DataMover, DataStorage, KernelBackend} from './backend';
import * as backend_util from './backend_util';
import {mergeRealAndImagArrays} from './complex_util';
Expand Down Expand Up @@ -128,6 +129,9 @@ export interface TensorHandle {
dtype: DataType;
}

// Empirically determined constant used to determine size threshold for handing
// off execution to the CPU.
const CPU_HANDOFF_SIZE_THRESHOLD = 10;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add explanation on this hard-coded number.

// Empirically determined constant used to decide the number of bytes on GPU
// before we start paging. The bytes are this constant * screen area * dpi.
const BEFORE_PAGING_CONSTANT = 300;
Expand Down Expand Up @@ -160,6 +164,7 @@ export class MathBackendWebGL implements KernelBackend {
private uploadWaitMs = 0;
// Accumulated time spent (including blocking in downloading data from webgl.
private downloadWaitMs = 0;
private cpuBackend: KernelBackend;

register(dataId: DataId, shape: number[], dtype: DataType): void {
if (this.texData.has(dataId)) {
Expand Down Expand Up @@ -504,6 +509,33 @@ export class MathBackendWebGL implements KernelBackend {
this.textureManager = new TextureManager(this.gpgpu);
}

private getCPUBackend() {
if (!ENV.get('WEBGL_CPU_FORWARD')) {
return null;
}

if (this.cpuBackend == null) {
this.cpuBackend = ENV.findBackend('cpu');
}

return this.cpuBackend;
}

/*
Tests whether all the inputs to an op are small and on the CPU. This heuristic
determines when it would be faster to execute a kernel on the CPU. WebGL
kernels opt into running this check and forwarding when appropriate.
TODO(https://github.com/tensorflow/tfjs/issues/872): Develop a more
sustainable strategy for optimizing backend execution of ops.
*/
private shouldExecuteOnCPU(
inputs: Tensor[], sizeThreshold = CPU_HANDOFF_SIZE_THRESHOLD): boolean {
return this.getCPUBackend() != null &&
inputs.every(
input => this.texData.get(input.dataId).texture == null &&
input.size < sizeThreshold);
}

getGPGPUContext(): GPGPUContext {
return this.gpgpu;
}
Expand Down Expand Up @@ -534,6 +566,10 @@ export class MathBackendWebGL implements KernelBackend {
}

slice<T extends Tensor>(x: T, begin: number[], size: number[]): T {
if (this.shouldExecuteOnCPU([x])) {
return this.cpuBackend.slice(x, begin, size);
}

const program = new SliceProgram(size);
const customSetup = program.getCustomSetupFunc(begin);
return this.compileAndRun(program, [x], null, customSetup);
Expand All @@ -543,6 +579,12 @@ export class MathBackendWebGL implements KernelBackend {
x: T, begin: number[], end: number[], strides: number[],
beginMask: number, endMask: number, ellipsisMask: number,
newAxisMask: number, shrinkAxisMask: number): T {
if (this.shouldExecuteOnCPU([x])) {
return this.cpuBackend.stridedSlice(
x, begin, end, strides, beginMask, endMask, ellipsisMask, newAxisMask,
shrinkAxisMask);
}

const [beginIndex, size, shrinkAxis] = getStridedSlicedInfo(
x.shape, begin, end, strides, beginMask, endMask, ellipsisMask,
newAxisMask, shrinkAxisMask);
Expand Down Expand Up @@ -579,6 +621,10 @@ export class MathBackendWebGL implements KernelBackend {
}

concat(tensors: Tensor[], axis: number): Tensor {
if (this.shouldExecuteOnCPU(tensors)) {
return this.cpuBackend.concat(tensors, axis);
}

if (tensors.length === 1) {
return tensors[0];
}
Expand Down Expand Up @@ -644,6 +690,10 @@ export class MathBackendWebGL implements KernelBackend {
return complex;
}

if (this.shouldExecuteOnCPU([a, b])) {
return this.cpuBackend.multiply(a, b);
}

const program = new BinaryOpProgram(binaryop_gpu.MUL, a.shape, b.shape);
const output = this.makeOutputArray(program.outputShape, a.dtype) as Tensor;
return this.compileAndRun(program, [a, b], output) as Tensor;
Expand Down Expand Up @@ -926,6 +976,10 @@ export class MathBackendWebGL implements KernelBackend {
}

less(a: Tensor, b: Tensor): Tensor {
if (this.shouldExecuteOnCPU([a, b])) {
return this.cpuBackend.less(a, b);
}

const program = new BinaryOpProgram(binaryop_gpu.LESS, a.shape, b.shape);
const output = this.makeOutputArray(program.outputShape, 'bool');
return this.compileAndRun(program, [a, b], output);
Expand All @@ -939,6 +993,10 @@ export class MathBackendWebGL implements KernelBackend {
}

greater(a: Tensor, b: Tensor): Tensor {
if (this.shouldExecuteOnCPU([a, b])) {
return this.cpuBackend.greater(a, b);
}

const program = new BinaryOpProgram(binaryop_gpu.GREATER, a.shape, b.shape);
const output = this.makeOutputArray(program.outputShape, 'bool');
return this.compileAndRun(program, [a, b], output);
Expand Down Expand Up @@ -1000,6 +1058,10 @@ export class MathBackendWebGL implements KernelBackend {
}

minimum(a: Tensor, b: Tensor): Tensor {
if (this.shouldExecuteOnCPU([a, b])) {
return this.cpuBackend.minimum(a, b);
}

const program = new BinaryOpProgram(binaryop_gpu.MIN, a.shape, b.shape);
return this.compileAndRun(program, [a, b]);
}
Expand All @@ -1020,6 +1082,10 @@ export class MathBackendWebGL implements KernelBackend {
}

maximum(a: Tensor, b: Tensor): Tensor {
if (this.shouldExecuteOnCPU([a, b])) {
return this.cpuBackend.maximum(a, b);
}

const program = new BinaryOpProgram(binaryop_gpu.MAX, a.shape, b.shape);
return this.compileAndRun(program, [a, b]);
}
Expand Down Expand Up @@ -1132,6 +1198,10 @@ export class MathBackendWebGL implements KernelBackend {
return this.complexSeparableBinaryOp(a, b, binaryop_gpu.SUB);
}

if (this.shouldExecuteOnCPU([a, b])) {
return this.cpuBackend.subtract(a, b);
}

const program = new BinaryOpProgram(binaryop_gpu.SUB, a.shape, b.shape);
const output =
this.makeOutputArray(
Expand Down