piercus
diff --git a/‎src/environment.ts
Lines changed: 2 additions & 0 deletions b/‎src/environment.ts
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/environment_util.ts
Lines changed: 3 additions & 0 deletions b/‎src/environment_util.ts
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/kernels/backend_webgl.ts
Lines changed: 4 additions & 1 deletion b/‎src/kernels/backend_webgl.ts
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/kernels/webgl/pad_packed_gpu.ts
Lines changed: 88 additions & 0 deletions b/‎src/kernels/webgl/pad_packed_gpu.ts
Lines changed: 88 additions & 0 deletions
diff --git a/‎src/kernels/webgl/shader_compiler.ts
Lines changed: 40 additions & 45 deletions b/‎src/kernels/webgl/shader_compiler.ts
Lines changed: 40 additions & 45 deletions
@@ -318,6 +318,8 @@ export class Environment {
       return this.get('WEBGL_PACK');
     } else if (feature === 'WEBGL_PACK_BINARY_OPERATIONS') {
       return this.get('WEBGL_PACK');
+    } else if (feature === 'WEBGL_PACK_ARRAY_OPERATIONS') {
+      return this.get('WEBGL_PACK');
     } else if (feature === 'WEBGL_LAZILY_UNPACK') {
       return this.get('WEBGL_PACK');
     } else if (feature === 'WEBGL_CONV_IM2COL') {
 
@@ -38,6 +38,8 @@ export interface Features {
   'WEBGL_PACK_DEPTHWISECONV'?: boolean;
   // Whether we will pack binary operations.
   'WEBGL_PACK_BINARY_OPERATIONS'?: boolean;
+  // Whether we will pack SpaceToBatchND, BatchToSpaceND, slice, pad, transpose.
+  'WEBGL_PACK_ARRAY_OPERATIONS'?: boolean;
   // Whether we will use the im2col algorithm to speed up convolutions.
   'WEBGL_CONV_IM2COL'?: boolean;
   // The maximum texture dimension.
@@ -108,6 +110,7 @@ export const URL_PROPERTIES: URLProperty[] = [
   {name: 'WEBGL_PACK_CLIP', type: Type.BOOLEAN},
   {name: 'WEBGL_PACK_DEPTHWISECONV', type: Type.BOOLEAN},
   {name: 'WEBGL_PACK_BINARY_OPERATIONS', type: Type.BOOLEAN},
+  {name: 'WEBGL_PACK_ARRAY_OPERATIONS', type: Type.BOOLEAN},
   {name: 'WEBGL_CONV_IM2COL', type: Type.BOOLEAN},
   {name: 'WEBGL_MAX_TEXTURE_SIZE', type: Type.NUMBER},
   {name: 'WEBGL_NUM_MB_BEFORE_PAGING', type: Type.NUMBER},
 
@@ -84,6 +84,7 @@ import {MultinomialProgram} from './webgl/multinomial_gpu';
 import {OneHotProgram} from './webgl/onehot_gpu';
 import {PackProgram} from './webgl/pack_gpu';
 import {PadProgram} from './webgl/pad_gpu';
+import {PadPackedProgram} from './webgl/pad_packed_gpu';
 import {Pool2DProgram} from './webgl/pool_gpu';
 import {ReduceProgram} from './webgl/reduce_gpu';
 import {ReshapePackedProgram} from './webgl/reshape_packed_gpu';
@@ -915,7 +916,9 @@ export class MathBackendWebGL implements KernelBackend {
 
   pad<T extends Tensor>(
       x: T, paddings: Array<[number, number]>, constantValue: number): T {
-    const program = new PadProgram(x.shape, paddings, constantValue);
+    const program = ENV.get('WEBGL_PACK_ARRAY_OPERATIONS') ?
+        new PadPackedProgram(x.shape, paddings, constantValue) :
+        new PadProgram(x.shape, paddings, constantValue);
     return this.compileAndRun(program, [x]);
   }
 
 
@@ -0,0 +1,88 @@
+/**
+ * @license
+ * Copyright 2019 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {GPGPUProgram} from './gpgpu_math';
+import {getCoordsDataType} from './shader_compiler';
+import {getChannels} from '../packing_util';
+
+export class PadPackedProgram implements GPGPUProgram {
+  variableNames = ['x'];
+  usesPackedTextures = true;
+  outputShape: number[];
+  userCode: string;
+
+  constructor(
+      xShape: number[], paddings: Array<[number, number]>,
+      constantValue: number) {
+    this.outputShape = paddings.map(
+        (p, i) => p[0] /* beforePad */ + xShape[i] + p[1] /* afterPad */);
+    const rank = xShape.length;
+    const dtype = getCoordsDataType(rank);
+
+    const start = paddings.map(p => p[0]).join(',');
+    const end = paddings.map((p, i) => p[0] + xShape[i]).join(',');
+    const coords = getChannels('rc', rank);
+    const source = getChannels('source', rank);
+    const cLimit = `${coords[rank - 1]} < ${this.outputShape[rank - 1]}`;
+    const innerDims =
+        rank === 1 ? 'source' : `vec2(${source.slice(-2).join()})`;
+
+    const componentSetup = [
+      `${dtype} rc = outputLoc;`,
+      `${coords[rank - 1]} += 1;
+       if(${cLimit}) {
+      `,
+      rank === 1 ? '' : 
+      `}
+       rc = outputLoc;
+       ${coords[rank - 2]} += 1;
+       if(${coords[rank - 2]} < ${this.outputShape[rank - 2]}) {`,
+      rank === 1 ? '' :
+      `  ${coords[rank - 1]} += 1;
+         if(${cLimit}) {`
+    ];
+    
+    const paddingArea = rank === 1 ?
+        'rc < start || rc >= end' :
+        'any(lessThan(rc, start)) || any(greaterThanEqual(rc, end))';
+    let mainLoop = '';
+    for (let i = 0, j = rank === 1 ? 2 : 4; i < j; i++) {
+      mainLoop += `
+        ${componentSetup[i]}
+        if (${paddingArea}) {
+          result[${i}] = float(${constantValue});
+        } else {
+          ${dtype} source = rc - start;
+          result[${i}] = getChannel(getX(${source.join()}), ${innerDims});
+        }
+      `;
+    }
+    mainLoop += (rank === 1 ? `} ` : `}}`);
+
+    this.userCode = `
+      const ${dtype} start = ${dtype}(${start});
+      const ${dtype} end = ${dtype}(${end});
+
+      void main() {
+        ${dtype} outputLoc = getOutputCoords();
+        vec4 result = vec4(0.);
+        ${mainLoop}
+        setOutput(result);
+      }
+    `;
+  }
+}
@@ -119,12 +119,8 @@ function getPackedSamplerFromInInfo(inInfo: InputInfo): string {
       return getPackedSampler2D(inInfo);
     case 3:
       return getPackedSampler3D(inInfo);
-    case 4:
-      return getPackedSampler4D(inInfo);
     default:
-      throw new Error(
-          `Packed ${shape.length}-D input sampling` +
-          ` is not yet supported`);
+      return getPackedSamplerND(inInfo);
   }
 }
 
@@ -165,13 +161,8 @@ function getPackedOutputSamplingSnippet(
     case 3:
       return getOutputPacked3DCoords(
           outShape as [number, number, number], outTexShape);
-    case 4:
-      return getOutputPacked4DCoords(
-          outShape as [number, number, number, number], outTexShape);
     default:
-      throw new Error(
-          `${outShape.length}-D packed output ` +
-          `coordinate fetching is not yet supported`);
+      return getOutputPackedNDCoords(outShape, outTexShape);
   }
 }
 
@@ -312,7 +303,6 @@ function getShaderPrefix(glsl: GLSL): string {
     ${SAMPLE_1D_SNIPPET}
     ${SAMPLE_2D_SNIPPET}
     ${SAMPLE_3D_SNIPPET}
-    ${SAMPLE_4D_SNIPPET}
     ${SAMPLE_5D_SNIPPET}
     ${SAMPLE_6D_SNIPPET}
   `;
@@ -355,18 +345,6 @@ vec2 packedUVfrom3D(int texNumR, int texNumC,
 }
 `;
 
-const SAMPLE_4D_SNIPPET = `
-vec2 packedUVfrom4D(int texNumR, int texNumC, int texelsInBatch2,
-    int texelsInBatch, int texelsInLogicalRow, int b2, int b,
-    int row, int col) {
-  int index = b2 * texelsInBatch2 + b * texelsInBatch +
-    (row / 2) * texelsInLogicalRow + (col / 2);
-  int texR = index / texNumC;
-  int texC = index - texR * texNumC;
-  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
-}
-`;
-
 const SAMPLE_5D_SNIPPET = `
 vec2 UVfrom5D(int texNumR, int texNumC, int stride0,
     int stride1, int stride2, int stride3, int row, int col, int depth,
@@ -508,32 +486,42 @@ function getOutput3DCoords(
   `;
 }
 
-function getOutputPacked4DCoords(
-    shape: [number, number, number, number],
-    texShape: [number, number]): string {
+function getOutputPackedNDCoords(
+    shape: number[], texShape: [number, number]): string {
   const packedTexShape =
       [Math.ceil(texShape[0] / 2), Math.ceil(texShape[1] / 2)];
 
-  const texelsInLogicalRow = Math.ceil(shape[3] / 2);
-  const texelsInBatch = texelsInLogicalRow * Math.ceil(shape[2] / 2);
-  const texelsInBatch2 = texelsInBatch * shape[1];
+  const texelsInLogicalRow = Math.ceil(shape[shape.length - 1] / 2);
+  const texelsInBatch =
+      texelsInLogicalRow * Math.ceil(shape[shape.length - 2] / 2);
+  let texelsInBatchN = texelsInBatch;
+  let batches = ``;
+  let coords = 'b, r, c';
+
+  for (let b = 2; b < shape.length - 1; b++) {
+    texelsInBatchN *= shape[shape.length - b - 1];
+    batches = `
+      int b${b} = index / ${texelsInBatchN};
+      index -= b${b} * ${texelsInBatchN};
+    ` + batches;
+    coords = `b${b}, ` + coords; 
+  }
 
   return `
-    ivec4 getOutputCoords() {
+    ivec${shape.length} getOutputCoords() {
       ivec2 resTexRC = ivec2(resultUV.yx *
                              vec2(${packedTexShape[0]}, ${packedTexShape[1]}));
       int index = resTexRC.x * ${packedTexShape[1]} + resTexRC.y;
-
-      int b2 = index / ${texelsInBatch2};
-      index -= b2 * ${texelsInBatch2};
+      
+      ${batches}
 
       int b = index / ${texelsInBatch};
       index -= b * ${texelsInBatch};
 
       int r = 2 * (index / ${texelsInLogicalRow});
       int c = imod(index, ${texelsInLogicalRow}) * 2;
 
-      return ivec4(b2, b, r, c);
+      return ivec${shape.length}(${coords});
     }
   `;
 }
@@ -996,8 +984,9 @@ function getSampler3D(inputInfo: InputInfo): string {
   `;
 }
 
-function getPackedSampler4D(inputInfo: InputInfo): string {
+function getPackedSamplerND(inputInfo: InputInfo): string {  
   const shape = inputInfo.shapeInfo.logicalShape;
+  const rank = shape.length;
   const texName = inputInfo.name;
   const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1);
   const texShape = inputInfo.shapeInfo.texShape;
@@ -1006,17 +995,23 @@ function getPackedSampler4D(inputInfo: InputInfo): string {
   const texNumR = packedTexShape[0];
   const texNumC = packedTexShape[1];
 
-  const valuesPerRow = Math.ceil(shape[3] / 2);
-  const texelsInBatch = valuesPerRow * Math.ceil(shape[2] / 2);
-  const texelsInBatch2 = texelsInBatch * shape[1];
+  const valuesPerRow = Math.ceil(shape[rank - 1] / 2);
+  let texelsInBatch = valuesPerRow * Math.ceil(shape[rank - 2] / 2);
+  let params = `int b, int row, int col`;
+  let index = `b * ${texelsInBatch} + (row / 2) * ${valuesPerRow} + (col / 2)`;
+  for (let b = 2; b < rank - 1; b++) {
+    params = `int b${b}, ` + params;
+    texelsInBatch *= shape[rank - b - 1]; 
+    index = `b${b} * ${texelsInBatch} + ` + index;
+  }
   const glsl = getGlslDifferences();
-
   return `
-    vec4 ${funcName}(int b2, int b, int row, int col) {
-      vec2 uv = packedUVfrom4D(
-        ${texNumR}, ${texNumC}, ${texelsInBatch2},
-        ${texelsInBatch}, ${valuesPerRow}, b2, b, row, col);
-      return ${glsl.texture2D}(${texName}, uv);
+    vec4 ${funcName}(${params}) {
+      int index = ${index};
+      int texR = index / ${texNumC};
+      int texC = index - texR * ${texNumC};
+      vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${texNumC}, ${texNumR});
+      return ${glsl.texture2D}(${texName}, uv);      
     }
   `;
 }