Skip to content

Commit 546eafa

Browse files
authored
Properly decode data from packed textures when calling async read. (tensorflow#1419)
- Add check for `isPacked` bit on textures and call appropriate decoder in `async read` - Revive logic for lazy uploading removed by tensorflow#1417 - Add unit test
1 parent 596ca25 commit 546eafa

File tree

4 files changed

+76
-27
lines changed

4 files changed

+76
-27
lines changed

src/kernels/backend_webgl.ts

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ import {SegmentOpProgram} from './webgl/segment_gpu';
9292
import {SelectProgram} from './webgl/select_gpu';
9393
import {SliceProgram} from './webgl/slice_gpu';
9494
import {StridedSliceProgram} from './webgl/strided_slice_gpu';
95+
import * as tex_util from './webgl/tex_util';
9596
import {TextureData, TextureUsage} from './webgl/tex_util';
9697
import {TextureManager} from './webgl/texture_manager';
9798
import {TileProgram} from './webgl/tile_gpu';
@@ -301,7 +302,7 @@ export class MathBackendWebGL implements KernelBackend {
301302
return new Promise<TypedArray>(resolve => subscribers.push(resolve));
302303
}
303304
const texData = this.texData.get(dataId);
304-
const {texture, values, texShape} = texData;
305+
const {texture, values, texShape, isPacked, shape} = texData;
305306
if (values != null) {
306307
return this.convertAndCacheOnCPU(dataId);
307308
}
@@ -316,8 +317,14 @@ export class MathBackendWebGL implements KernelBackend {
316317
}
317318

318319
// Possibly copy the texture into a buffer before inserting a fence.
319-
const bufferOrTexture = this.gpgpu.maybeCreateBufferFromTexture(
320-
texture, texShape[0], texShape[1]);
320+
let width = texShape[1];
321+
let height = texShape[0];
322+
if (isPacked) {
323+
[width, height] = tex_util.getPackedMatrixTextureShapeWidthHeight(
324+
texShape[0], texShape[1]);
325+
}
326+
const bufferOrTexture =
327+
this.gpgpu.maybeCreateBufferFromTexture(texture, height, width);
321328

322329
// Create a fence and wait for it to resolve.
323330
await this.gpgpu.createAndWaitForFence();
@@ -327,8 +334,18 @@ export class MathBackendWebGL implements KernelBackend {
327334
if (bufferOrTexture instanceof WebGLTexture) {
328335
vals = this.getValuesFromTexture(dataId);
329336
} else {
330-
vals = this.gpgpu.downloadFloat32MatrixFromBuffer(
331-
bufferOrTexture, texShape[0], texShape[1]);
337+
if (isPacked) {
338+
const batch = this.getBatchDim(shape);
339+
let rows = 1, cols = 1;
340+
if (shape.length) {
341+
[rows, cols] = this.getRowsCols(shape);
342+
}
343+
vals = this.gpgpu.downloadPackedMatrixFromBuffer(
344+
bufferOrTexture, batch, rows, cols, texShape[0], texShape[1]);
345+
} else {
346+
vals = this.gpgpu.downloadFloat32MatrixFromBuffer(
347+
bufferOrTexture, texShape[0], texShape[1]);
348+
}
332349
}
333350
const dTypeVals = this.convertAndCacheOnCPU(dataId, vals);
334351

@@ -1803,31 +1820,29 @@ export class MathBackendWebGL implements KernelBackend {
18031820

18041821
let texData = this.texData.get(input.dataId);
18051822

1806-
if (texData.texture == null &&
1807-
!(!texData.isPacked && program.usesPackedTextures) &&
1808-
util.sizeFromShape(input.shape) <=
1809-
ENV.get('WEBGL_SIZE_UPLOAD_UNIFORM')) {
1810-
// Upload small tensors that live on the CPU as uniforms, not as
1811-
// textures. Do this only when the environment supports 32bit floats due
1812-
// to problems when comparing 16bit floats with 32bit floats.
1813-
// TODO(https://github.com/tensorflow/tfjs/issues/821): Make it possible
1814-
// for packed shaders to sample from uniforms.
1815-
return {
1816-
shape: input.shape,
1817-
texData: null,
1818-
isUniform: true,
1819-
uniformValues: this.readSync(input.dataId) as TypedArray
1820-
};
1821-
1822-
// TODO(annyuan): Revive this block once uploading to packed textures is
1823-
// fixed.
1823+
if (texData.texture == null) {
1824+
if (!(!texData.isPacked && program.usesPackedTextures) &&
1825+
util.sizeFromShape(input.shape) <=
1826+
ENV.get('WEBGL_SIZE_UPLOAD_UNIFORM')) {
1827+
// Upload small tensors that live on the CPU as uniforms, not as
1828+
// textures. Do this only when the environment supports 32bit floats
1829+
// due to problems when comparing 16bit floats with 32bit floats.
1830+
// TODO(https://github.com/tensorflow/tfjs/issues/821): Make it
1831+
// possible for packed shaders to sample from uniforms.
1832+
return {
1833+
shape: input.shape,
1834+
texData: null,
1835+
isUniform: true,
1836+
uniformValues: this.readSync(input.dataId) as TypedArray
1837+
};
1838+
}
18241839

18251840
// This ensures that if a packed program's inputs have not yet been
18261841
// uploaded to the GPU, they get uploaded as packed right off the bat.
1827-
// if (program.usesPackedTextures) {
1828-
// texData.isPacked = true;
1829-
// texData.shape = input.shape;
1830-
//}
1842+
if (program.usesPackedTextures) {
1843+
texData.isPacked = true;
1844+
texData.shape = input.shape;
1845+
}
18311846
} else if (!!texData.isPacked !== !!program.usesPackedTextures) {
18321847
let preProcessProgram: UnpackProgram|PackProgram;
18331848
let processedInput: Tensor;

src/kernels/webgl/gpgpu_context.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,14 @@ export class GPGPUContext {
199199
this.gl, rows, columns, this.textureConfig));
200200
}
201201

202+
public downloadPackedMatrixFromBuffer(
203+
buffer: WebGLBuffer, batch: number, rows: number, columns: number,
204+
physicalRows: number, physicalCols: number): Float32Array {
205+
return gpgpu_util.downloadPackedMatrixFromBuffer(
206+
this.gl, buffer, batch, rows, columns, physicalRows, physicalCols,
207+
this.textureConfig);
208+
}
209+
202210
public downloadFloat32MatrixFromBuffer(
203211
buffer: WebGLBuffer, rows: number, columns: number): Float32Array {
204212
return gpgpu_util.downloadFloat32MatrixFromBuffer(

src/kernels/webgl/gpgpu_util.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,26 @@ export function downloadByteEncodedFloatMatrixFromOutputTexture(
381381
return new Float32Array(downloadTarget.buffer);
382382
}
383383

384+
export function downloadPackedMatrixFromBuffer(
385+
gl: WebGLRenderingContext, buffer: WebGLBuffer, batch: number, rows: number,
386+
cols: number, physicalRows: number, physicalCols: number,
387+
textureConfig: TextureConfig): Float32Array {
388+
const gl2 = gl as WebGL2RenderingContext;
389+
390+
const downloadTarget =
391+
new Float32Array(tex_util.getPackedRGBAArraySizeFromMatrixShape(
392+
physicalRows, physicalCols));
393+
394+
gl2.bindBuffer(gl.ARRAY_BUFFER, buffer);
395+
gl2.getBufferSubData(gl.ARRAY_BUFFER, 0, downloadTarget);
396+
gl2.bindBuffer(gl.ARRAY_BUFFER, null);
397+
398+
const matrix = new Float32Array(util.sizeFromShape([batch, rows, cols]));
399+
tex_util.decodeMatrixFromPackedRGBA(
400+
downloadTarget, batch, rows, cols, matrix);
401+
return matrix;
402+
}
403+
384404
export function downloadMatrixFromPackedOutputTexture(
385405
gl: WebGLRenderingContext, batch: number, rows: number, cols: number,
386406
physicalRows: number, physicalCols: number,

src/tensor_test.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ describeWithFlags('tensor', ALL_ENVS, () => {
108108
expectArraysClose(await a.data(), new Float32Array([1, 2, 3, 4, 5, 6]));
109109
});
110110

111+
it('Tensor.data() packed CPU --> GPU', async () => {
112+
const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [3, 2]);
113+
tf.matMul(a, tf.tensor2d([1, 2], [2, 1]));
114+
expectArraysClose(await a.data(), new Float32Array([1, 2, 3, 4, 5, 6]));
115+
});
116+
111117
it('Scalar basic methods', () => {
112118
const a = tf.scalar(5);
113119
expectNumbersClose(a.get(), 5);

0 commit comments

Comments
 (0)