Skip to content

Commit 4e7b780

Browse files
astojiljannxingyuan
authored andcommitted
Packed pad operation. (tensorflow#1498)
PERF * Packed pad operation. Packed input sampling and packed output coordinate calculation for arbitrary tensor rank. Tests verify 4-D, 5-D and 6-D. Introduced WEBGL_PACK_ARRAY_OPERATIONS flag to configure usage of packed SpaceToBatchND, BatchToSpaceND, slice, pad, transpose.
1 parent 72ca174 commit 4e7b780

File tree

5 files changed

+137
-46
lines changed

5 files changed

+137
-46
lines changed

src/environment.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,8 @@ export class Environment {
318318
return this.get('WEBGL_PACK');
319319
} else if (feature === 'WEBGL_PACK_BINARY_OPERATIONS') {
320320
return this.get('WEBGL_PACK');
321+
} else if (feature === 'WEBGL_PACK_ARRAY_OPERATIONS') {
322+
return this.get('WEBGL_PACK');
321323
} else if (feature === 'WEBGL_LAZILY_UNPACK') {
322324
return this.get('WEBGL_PACK');
323325
} else if (feature === 'WEBGL_CONV_IM2COL') {

src/environment_util.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ export interface Features {
3838
'WEBGL_PACK_DEPTHWISECONV'?: boolean;
3939
// Whether we will pack binary operations.
4040
'WEBGL_PACK_BINARY_OPERATIONS'?: boolean;
41+
// Whether we will pack SpaceToBatchND, BatchToSpaceND, slice, pad, transpose.
42+
'WEBGL_PACK_ARRAY_OPERATIONS'?: boolean;
4143
// Whether we will use the im2col algorithm to speed up convolutions.
4244
'WEBGL_CONV_IM2COL'?: boolean;
4345
// The maximum texture dimension.
@@ -108,6 +110,7 @@ export const URL_PROPERTIES: URLProperty[] = [
108110
{name: 'WEBGL_PACK_CLIP', type: Type.BOOLEAN},
109111
{name: 'WEBGL_PACK_DEPTHWISECONV', type: Type.BOOLEAN},
110112
{name: 'WEBGL_PACK_BINARY_OPERATIONS', type: Type.BOOLEAN},
113+
{name: 'WEBGL_PACK_ARRAY_OPERATIONS', type: Type.BOOLEAN},
111114
{name: 'WEBGL_CONV_IM2COL', type: Type.BOOLEAN},
112115
{name: 'WEBGL_MAX_TEXTURE_SIZE', type: Type.NUMBER},
113116
{name: 'WEBGL_NUM_MB_BEFORE_PAGING', type: Type.NUMBER},

src/kernels/backend_webgl.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ import {MultinomialProgram} from './webgl/multinomial_gpu';
8484
import {OneHotProgram} from './webgl/onehot_gpu';
8585
import {PackProgram} from './webgl/pack_gpu';
8686
import {PadProgram} from './webgl/pad_gpu';
87+
import {PadPackedProgram} from './webgl/pad_packed_gpu';
8788
import {Pool2DProgram} from './webgl/pool_gpu';
8889
import {ReduceProgram} from './webgl/reduce_gpu';
8990
import {ReshapePackedProgram} from './webgl/reshape_packed_gpu';
@@ -915,7 +916,9 @@ export class MathBackendWebGL implements KernelBackend {
915916

916917
pad<T extends Tensor>(
917918
x: T, paddings: Array<[number, number]>, constantValue: number): T {
918-
const program = new PadProgram(x.shape, paddings, constantValue);
919+
const program = ENV.get('WEBGL_PACK_ARRAY_OPERATIONS') ?
920+
new PadPackedProgram(x.shape, paddings, constantValue) :
921+
new PadProgram(x.shape, paddings, constantValue);
919922
return this.compileAndRun(program, [x]);
920923
}
921924

src/kernels/webgl/pad_packed_gpu.ts

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/**
2+
* @license
3+
* Copyright 2019 Google Inc. All Rights Reserved.
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
* =============================================================================
16+
*/
17+
18+
import {GPGPUProgram} from './gpgpu_math';
19+
import {getCoordsDataType} from './shader_compiler';
20+
import {getChannels} from '../packing_util';
21+
22+
export class PadPackedProgram implements GPGPUProgram {
23+
variableNames = ['x'];
24+
usesPackedTextures = true;
25+
outputShape: number[];
26+
userCode: string;
27+
28+
constructor(
29+
xShape: number[], paddings: Array<[number, number]>,
30+
constantValue: number) {
31+
this.outputShape = paddings.map(
32+
(p, i) => p[0] /* beforePad */ + xShape[i] + p[1] /* afterPad */);
33+
const rank = xShape.length;
34+
const dtype = getCoordsDataType(rank);
35+
36+
const start = paddings.map(p => p[0]).join(',');
37+
const end = paddings.map((p, i) => p[0] + xShape[i]).join(',');
38+
const coords = getChannels('rc', rank);
39+
const source = getChannels('source', rank);
40+
const cLimit = `${coords[rank - 1]} < ${this.outputShape[rank - 1]}`;
41+
const innerDims =
42+
rank === 1 ? 'source' : `vec2(${source.slice(-2).join()})`;
43+
44+
const componentSetup = [
45+
`${dtype} rc = outputLoc;`,
46+
`${coords[rank - 1]} += 1;
47+
if(${cLimit}) {
48+
`,
49+
rank === 1 ? '' :
50+
`}
51+
rc = outputLoc;
52+
${coords[rank - 2]} += 1;
53+
if(${coords[rank - 2]} < ${this.outputShape[rank - 2]}) {`,
54+
rank === 1 ? '' :
55+
` ${coords[rank - 1]} += 1;
56+
if(${cLimit}) {`
57+
];
58+
59+
const paddingArea = rank === 1 ?
60+
'rc < start || rc >= end' :
61+
'any(lessThan(rc, start)) || any(greaterThanEqual(rc, end))';
62+
let mainLoop = '';
63+
for (let i = 0, j = rank === 1 ? 2 : 4; i < j; i++) {
64+
mainLoop += `
65+
${componentSetup[i]}
66+
if (${paddingArea}) {
67+
result[${i}] = float(${constantValue});
68+
} else {
69+
${dtype} source = rc - start;
70+
result[${i}] = getChannel(getX(${source.join()}), ${innerDims});
71+
}
72+
`;
73+
}
74+
mainLoop += (rank === 1 ? `} ` : `}}`);
75+
76+
this.userCode = `
77+
const ${dtype} start = ${dtype}(${start});
78+
const ${dtype} end = ${dtype}(${end});
79+
80+
void main() {
81+
${dtype} outputLoc = getOutputCoords();
82+
vec4 result = vec4(0.);
83+
${mainLoop}
84+
setOutput(result);
85+
}
86+
`;
87+
}
88+
}

src/kernels/webgl/shader_compiler.ts

Lines changed: 40 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,8 @@ function getPackedSamplerFromInInfo(inInfo: InputInfo): string {
119119
return getPackedSampler2D(inInfo);
120120
case 3:
121121
return getPackedSampler3D(inInfo);
122-
case 4:
123-
return getPackedSampler4D(inInfo);
124122
default:
125-
throw new Error(
126-
`Packed ${shape.length}-D input sampling` +
127-
` is not yet supported`);
123+
return getPackedSamplerND(inInfo);
128124
}
129125
}
130126

@@ -165,13 +161,8 @@ function getPackedOutputSamplingSnippet(
165161
case 3:
166162
return getOutputPacked3DCoords(
167163
outShape as [number, number, number], outTexShape);
168-
case 4:
169-
return getOutputPacked4DCoords(
170-
outShape as [number, number, number, number], outTexShape);
171164
default:
172-
throw new Error(
173-
`${outShape.length}-D packed output ` +
174-
`coordinate fetching is not yet supported`);
165+
return getOutputPackedNDCoords(outShape, outTexShape);
175166
}
176167
}
177168

@@ -312,7 +303,6 @@ function getShaderPrefix(glsl: GLSL): string {
312303
${SAMPLE_1D_SNIPPET}
313304
${SAMPLE_2D_SNIPPET}
314305
${SAMPLE_3D_SNIPPET}
315-
${SAMPLE_4D_SNIPPET}
316306
${SAMPLE_5D_SNIPPET}
317307
${SAMPLE_6D_SNIPPET}
318308
`;
@@ -355,18 +345,6 @@ vec2 packedUVfrom3D(int texNumR, int texNumC,
355345
}
356346
`;
357347

358-
const SAMPLE_4D_SNIPPET = `
359-
vec2 packedUVfrom4D(int texNumR, int texNumC, int texelsInBatch2,
360-
int texelsInBatch, int texelsInLogicalRow, int b2, int b,
361-
int row, int col) {
362-
int index = b2 * texelsInBatch2 + b * texelsInBatch +
363-
(row / 2) * texelsInLogicalRow + (col / 2);
364-
int texR = index / texNumC;
365-
int texC = index - texR * texNumC;
366-
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
367-
}
368-
`;
369-
370348
const SAMPLE_5D_SNIPPET = `
371349
vec2 UVfrom5D(int texNumR, int texNumC, int stride0,
372350
int stride1, int stride2, int stride3, int row, int col, int depth,
@@ -508,32 +486,42 @@ function getOutput3DCoords(
508486
`;
509487
}
510488

511-
function getOutputPacked4DCoords(
512-
shape: [number, number, number, number],
513-
texShape: [number, number]): string {
489+
function getOutputPackedNDCoords(
490+
shape: number[], texShape: [number, number]): string {
514491
const packedTexShape =
515492
[Math.ceil(texShape[0] / 2), Math.ceil(texShape[1] / 2)];
516493

517-
const texelsInLogicalRow = Math.ceil(shape[3] / 2);
518-
const texelsInBatch = texelsInLogicalRow * Math.ceil(shape[2] / 2);
519-
const texelsInBatch2 = texelsInBatch * shape[1];
494+
const texelsInLogicalRow = Math.ceil(shape[shape.length - 1] / 2);
495+
const texelsInBatch =
496+
texelsInLogicalRow * Math.ceil(shape[shape.length - 2] / 2);
497+
let texelsInBatchN = texelsInBatch;
498+
let batches = ``;
499+
let coords = 'b, r, c';
500+
501+
for (let b = 2; b < shape.length - 1; b++) {
502+
texelsInBatchN *= shape[shape.length - b - 1];
503+
batches = `
504+
int b${b} = index / ${texelsInBatchN};
505+
index -= b${b} * ${texelsInBatchN};
506+
` + batches;
507+
coords = `b${b}, ` + coords;
508+
}
520509

521510
return `
522-
ivec4 getOutputCoords() {
511+
ivec${shape.length} getOutputCoords() {
523512
ivec2 resTexRC = ivec2(resultUV.yx *
524513
vec2(${packedTexShape[0]}, ${packedTexShape[1]}));
525514
int index = resTexRC.x * ${packedTexShape[1]} + resTexRC.y;
526-
527-
int b2 = index / ${texelsInBatch2};
528-
index -= b2 * ${texelsInBatch2};
515+
516+
${batches}
529517
530518
int b = index / ${texelsInBatch};
531519
index -= b * ${texelsInBatch};
532520
533521
int r = 2 * (index / ${texelsInLogicalRow});
534522
int c = imod(index, ${texelsInLogicalRow}) * 2;
535523
536-
return ivec4(b2, b, r, c);
524+
return ivec${shape.length}(${coords});
537525
}
538526
`;
539527
}
@@ -996,8 +984,9 @@ function getSampler3D(inputInfo: InputInfo): string {
996984
`;
997985
}
998986

999-
function getPackedSampler4D(inputInfo: InputInfo): string {
987+
function getPackedSamplerND(inputInfo: InputInfo): string {
1000988
const shape = inputInfo.shapeInfo.logicalShape;
989+
const rank = shape.length;
1001990
const texName = inputInfo.name;
1002991
const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1);
1003992
const texShape = inputInfo.shapeInfo.texShape;
@@ -1006,17 +995,23 @@ function getPackedSampler4D(inputInfo: InputInfo): string {
1006995
const texNumR = packedTexShape[0];
1007996
const texNumC = packedTexShape[1];
1008997

1009-
const valuesPerRow = Math.ceil(shape[3] / 2);
1010-
const texelsInBatch = valuesPerRow * Math.ceil(shape[2] / 2);
1011-
const texelsInBatch2 = texelsInBatch * shape[1];
998+
const valuesPerRow = Math.ceil(shape[rank - 1] / 2);
999+
let texelsInBatch = valuesPerRow * Math.ceil(shape[rank - 2] / 2);
1000+
let params = `int b, int row, int col`;
1001+
let index = `b * ${texelsInBatch} + (row / 2) * ${valuesPerRow} + (col / 2)`;
1002+
for (let b = 2; b < rank - 1; b++) {
1003+
params = `int b${b}, ` + params;
1004+
texelsInBatch *= shape[rank - b - 1];
1005+
index = `b${b} * ${texelsInBatch} + ` + index;
1006+
}
10121007
const glsl = getGlslDifferences();
1013-
10141008
return `
1015-
vec4 ${funcName}(int b2, int b, int row, int col) {
1016-
vec2 uv = packedUVfrom4D(
1017-
${texNumR}, ${texNumC}, ${texelsInBatch2},
1018-
${texelsInBatch}, ${valuesPerRow}, b2, b, row, col);
1019-
return ${glsl.texture2D}(${texName}, uv);
1009+
vec4 ${funcName}(${params}) {
1010+
int index = ${index};
1011+
int texR = index / ${texNumC};
1012+
int texC = index - texR * ${texNumC};
1013+
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${texNumC}, ${texNumR});
1014+
return ${glsl.texture2D}(${texName}, uv);
10201015
}
10211016
`;
10221017
}

0 commit comments

Comments
 (0)