piercus
diff --git a/‎src/kernels/backend_webgl.ts
Lines changed: 18 additions & 52 deletions b/‎src/kernels/backend_webgl.ts
Lines changed: 18 additions & 52 deletions
diff --git a/‎src/kernels/webgl/mulmat_gpu.ts
Lines changed: 0 additions & 123 deletions b/‎src/kernels/webgl/mulmat_gpu.ts
Lines changed: 0 additions & 123 deletions
diff --git a/‎src/kernels/webgl/mulmat_packed_gpu.ts
Lines changed: 9 additions & 9 deletions b/‎src/kernels/webgl/mulmat_packed_gpu.ts
Lines changed: 9 additions & 9 deletions
@@ -77,7 +77,6 @@ import {Im2ColProgram} from './webgl/im2col_gpu';
 import {LRNProgram} from './webgl/lrn_gpu';
 import {LRNGradProgram} from './webgl/lrn_grad_gpu';
 import {MaxPool2DBackpropProgram} from './webgl/max_pool_backprop_gpu';
-import {MatMulProgram} from './webgl/mulmat_gpu';
 import {MatMulPackedProgram} from './webgl/mulmat_packed_gpu';
 import {MultinomialProgram} from './webgl/multinomial_gpu';
 import {OneHotProgram} from './webgl/onehot_gpu';
@@ -769,26 +768,11 @@ export class MathBackendWebGL implements KernelBackend {
 
     const dtype = upcastType(a.dtype, b.dtype);
 
-    // TODO(https://github.com/tensorflow/tfjs/issues/693): Support 3D tensors
-    if (batch === 1) {
-      const aSqueezed = a.as2D(a.shape[1], a.shape[2]);
-      const bSqueezed = b.as2D(b.shape[1], b.shape[2]);
-
-      const program = new MatMulPackedProgram(
-          aSqueezed.shape, bSqueezed.shape, [outerShapeA, outerShapeB],
-          transposeA, transposeB);
-      const output =
-          this.makePackedTensor(program.outputShape, dtype) as Tensor2D;
-      const result =
-          this.compileAndRun<Tensor2D>(program, [aSqueezed, bSqueezed], output);
-      return result.reshape([1, result.shape[0], result.shape[1]]);
-    } else {
-      const program =
-          new MatMulProgram(a.shape, b.shape, transposeA, transposeB);
-      const output =
-          this.makeOutputArray(program.outputShape, dtype) as Tensor3D;
-      return this.compileAndRun(program, [a, b], output);
-    }
+    const program = new MatMulPackedProgram(a.shape,
+        [batch, outerShapeA, outerShapeB], transposeA, transposeB);
+    const output =
+        this.makePackedTensor(program.outputShape, dtype) as Tensor3D;
+    return this.compileAndRun<Tensor3D>(program, [a, b], output);
   }
 
   fusedBatchMatMul(
@@ -800,35 +784,16 @@ export class MathBackendWebGL implements KernelBackend {
 
     const dtype = upcastType(a.dtype, b.dtype);
 
-    // TODO(https://github.com/tensorflow/tfjs/issues/693): Support 3D tensors
-    if (batch === 1) {
-      const aSqueezed = a.as2D(a.shape[1], a.shape[2]);
-      const bSqueezed = b.as2D(b.shape[1], b.shape[2]);
-
-      const program = new MatMulPackedProgram(
-          aSqueezed.shape, bSqueezed.shape, [outerShapeA, outerShapeB],
-          transposeA, transposeB, !!bias,
-          activation ? mapActivationToShaderProgram(activation, true) : null);
-      const output =
-          this.makePackedTensor(program.outputShape, dtype) as Tensor2D;
-      const inputs: TensorHandle[] = [aSqueezed, bSqueezed];
-      if (bias) {
-        inputs.push(bias);
-      }
-      const result = this.compileAndRun<Tensor2D>(program, inputs, output);
-      return result.reshape([1, result.shape[0], result.shape[1]]);
-    } else {
-      const program = new MatMulProgram(
-          a.shape, b.shape, transposeA, transposeB, !!bias,
-          activation ? mapActivationToShaderProgram(activation) : null);
-      const inputs: TensorHandle[] = [a, b];
-      if (bias) {
-        inputs.push(bias);
-      }
-      const output =
-          this.makeOutputArray(program.outputShape, dtype) as Tensor3D;
-      return this.compileAndRun(program, inputs, output);
+    const program = new MatMulPackedProgram(a.shape,
+        [batch, outerShapeA, outerShapeB], transposeA, transposeB, !!bias,
+        activation ? mapActivationToShaderProgram(activation, true) : null);
+    const output =
+        this.makePackedTensor(program.outputShape, dtype) as Tensor3D;
+    const inputs: TensorHandle[] = [a, b];
+    if (bias) {
+      inputs.push(bias);
     }
+    return this.compileAndRun<Tensor3D>(program, inputs, output);
   }
 
   multiply(a: Tensor, b: Tensor): Tensor {
@@ -1711,14 +1676,15 @@ export class MathBackendWebGL implements KernelBackend {
     const x2ColShape = [sharedDim, numCols];
 
     const xSqueezed = x.squeeze([0]);
-    const w2Row = filter.reshape([sharedDim, -1]) as Tensor2D;
+    const w2Row = filter.reshape([1, sharedDim, -1]) as Tensor3D;
 
     const im2ColProgram =
         new Im2ColProgram(x2ColShape, xSqueezed.shape, convInfo);
-    const im2Col = this.compileAndRun<Tensor2D>(im2ColProgram, [xSqueezed]);
+    const im2Col = this.compileAndRun<Tensor2D>(im2ColProgram, [xSqueezed]).
+        reshape([1, x2ColShape[0], x2ColShape[1]]) as Tensor3D;
 
     const matmulProgram = new MatMulPackedProgram(
-        im2Col.shape, w2Row.shape, [numCols, convInfo.outChannels], true,
+        im2Col.shape, [1, numCols, convInfo.outChannels], true,
         false);
     const product =
         this.compileAndRun<Tensor4D>(matmulProgram, [im2Col, w2Row]);
 
@@ -24,16 +24,16 @@ export class MatMulPackedProgram implements GPGPUProgram {
   userCode: string;
 
   constructor(
-      aShape: [number, number], bShape: [number, number],
-      outputShape: [number, number], transposeA = false, transposeB = false,
+      aShape: [number, number, number], outputShape: [number, number, number],
+      transposeA = false, transposeB = false,
       addBias = false, activation: string = null) {
     this.outputShape = outputShape;
 
-    const sharedDim = transposeA ? aShape[0] : aShape[1];
+    const sharedDim = transposeA ? aShape[1] : aShape[2];
     const sharedDimensionPacked = Math.ceil(sharedDim / 2);
 
-    const aSample = transposeA ? 'i * 2, rc.x' : 'rc.x, i * 2';
-    const bSample = transposeB ? 'rc.y, i * 2' : 'i * 2, rc.y';
+    const aSample = transposeA ? 'i * 2, rc.y' : 'rc.y, i * 2';
+    const bSample = transposeB ? 'rc.z, i * 2' : 'i * 2, rc.z';
     const aSwizzle = transposeA ? ['a.xxyy', 'a.zzww'] : ['a.xxzz', 'a.yyww'];
     const bSwizzle = transposeB ? ['b.xzxz', 'b.ywyw'] : ['b.xyxy', 'b.zwzw'];
 
@@ -56,11 +56,11 @@ export class MatMulPackedProgram implements GPGPUProgram {
 
       const float sharedDimension = ${sharedDimensionPacked}.0;
 
-      vec4 dot2x2ARowBCol(ivec2 rc) {
+      vec4 dot2x2ARowBCol(ivec3 rc) {
         vec4 result = vec4(0);
         for (int i = 0; i < ${sharedDimensionPacked}; i++) {
-          vec4 a = getMatrixA(${aSample});
-          vec4 b = getMatrixB(${bSample});
+          vec4 a = getMatrixA(rc.x, ${aSample});
+          vec4 b = getMatrixB(rc.x, ${bSample});
 
           result += (${aSwizzle[0]} * ${bSwizzle[0]}) + (${aSwizzle[1]} * ${
         bSwizzle[1]});
@@ -69,7 +69,7 @@ export class MatMulPackedProgram implements GPGPUProgram {
       }
 
       void main() {
-        ivec2 rc = getOutputCoords();
+        ivec3 rc = getOutputCoords();
         vec4 result = dot2x2ARowBCol(rc);
 
         ${addBiasSnippet}