@@ -92,6 +92,7 @@ import {SegmentOpProgram} from './webgl/segment_gpu';
92
92
import { SelectProgram } from './webgl/select_gpu' ;
93
93
import { SliceProgram } from './webgl/slice_gpu' ;
94
94
import { StridedSliceProgram } from './webgl/strided_slice_gpu' ;
95
+ import * as tex_util from './webgl/tex_util' ;
95
96
import { TextureData , TextureUsage } from './webgl/tex_util' ;
96
97
import { TextureManager } from './webgl/texture_manager' ;
97
98
import { TileProgram } from './webgl/tile_gpu' ;
@@ -301,7 +302,7 @@ export class MathBackendWebGL implements KernelBackend {
301
302
return new Promise < TypedArray > ( resolve => subscribers . push ( resolve ) ) ;
302
303
}
303
304
const texData = this . texData . get ( dataId ) ;
304
- const { texture, values, texShape} = texData ;
305
+ const { texture, values, texShape, isPacked , shape } = texData ;
305
306
if ( values != null ) {
306
307
return this . convertAndCacheOnCPU ( dataId ) ;
307
308
}
@@ -316,8 +317,14 @@ export class MathBackendWebGL implements KernelBackend {
316
317
}
317
318
318
319
// Possibly copy the texture into a buffer before inserting a fence.
319
- const bufferOrTexture = this . gpgpu . maybeCreateBufferFromTexture (
320
- texture , texShape [ 0 ] , texShape [ 1 ] ) ;
320
+ let width = texShape [ 1 ] ;
321
+ let height = texShape [ 0 ] ;
322
+ if ( isPacked ) {
323
+ [ width , height ] = tex_util . getPackedMatrixTextureShapeWidthHeight (
324
+ texShape [ 0 ] , texShape [ 1 ] ) ;
325
+ }
326
+ const bufferOrTexture =
327
+ this . gpgpu . maybeCreateBufferFromTexture ( texture , height , width ) ;
321
328
322
329
// Create a fence and wait for it to resolve.
323
330
await this . gpgpu . createAndWaitForFence ( ) ;
@@ -327,8 +334,18 @@ export class MathBackendWebGL implements KernelBackend {
327
334
if ( bufferOrTexture instanceof WebGLTexture ) {
328
335
vals = this . getValuesFromTexture ( dataId ) ;
329
336
} else {
330
- vals = this . gpgpu . downloadFloat32MatrixFromBuffer (
331
- bufferOrTexture , texShape [ 0 ] , texShape [ 1 ] ) ;
337
+ if ( isPacked ) {
338
+ const batch = this . getBatchDim ( shape ) ;
339
+ let rows = 1 , cols = 1 ;
340
+ if ( shape . length ) {
341
+ [ rows , cols ] = this . getRowsCols ( shape ) ;
342
+ }
343
+ vals = this . gpgpu . downloadPackedMatrixFromBuffer (
344
+ bufferOrTexture , batch , rows , cols , texShape [ 0 ] , texShape [ 1 ] ) ;
345
+ } else {
346
+ vals = this . gpgpu . downloadFloat32MatrixFromBuffer (
347
+ bufferOrTexture , texShape [ 0 ] , texShape [ 1 ] ) ;
348
+ }
332
349
}
333
350
const dTypeVals = this . convertAndCacheOnCPU ( dataId , vals ) ;
334
351
@@ -1803,31 +1820,29 @@ export class MathBackendWebGL implements KernelBackend {
1803
1820
1804
1821
let texData = this . texData . get ( input . dataId ) ;
1805
1822
1806
- if ( texData . texture == null &&
1807
- ! ( ! texData . isPacked && program . usesPackedTextures ) &&
1808
- util . sizeFromShape ( input . shape ) <=
1809
- ENV . get ( 'WEBGL_SIZE_UPLOAD_UNIFORM' ) ) {
1810
- // Upload small tensors that live on the CPU as uniforms, not as
1811
- // textures. Do this only when the environment supports 32bit floats due
1812
- // to problems when comparing 16bit floats with 32bit floats.
1813
- // TODO(https://github.com/tensorflow/tfjs/issues/821): Make it possible
1814
- // for packed shaders to sample from uniforms.
1815
- return {
1816
- shape : input . shape ,
1817
- texData : null ,
1818
- isUniform : true ,
1819
- uniformValues : this . readSync ( input . dataId ) as TypedArray
1820
- } ;
1821
-
1822
- // TODO(annyuan): Revive this block once uploading to packed textures is
1823
- // fixed.
1823
+ if ( texData . texture == null ) {
1824
+ if ( ! ( ! texData . isPacked && program . usesPackedTextures ) &&
1825
+ util . sizeFromShape ( input . shape ) <=
1826
+ ENV . get ( 'WEBGL_SIZE_UPLOAD_UNIFORM' ) ) {
1827
+ // Upload small tensors that live on the CPU as uniforms, not as
1828
+ // textures. Do this only when the environment supports 32bit floats
1829
+ // due to problems when comparing 16bit floats with 32bit floats.
1830
+ // TODO(https://github.com/tensorflow/tfjs/issues/821): Make it
1831
+ // possible for packed shaders to sample from uniforms.
1832
+ return {
1833
+ shape : input . shape ,
1834
+ texData : null ,
1835
+ isUniform : true ,
1836
+ uniformValues : this . readSync ( input . dataId ) as TypedArray
1837
+ } ;
1838
+ }
1824
1839
1825
1840
// This ensures that if a packed program's inputs have not yet been
1826
1841
// uploaded to the GPU, they get uploaded as packed right off the bat.
1827
- // if (program.usesPackedTextures) {
1828
- // texData.isPacked = true;
1829
- // texData.shape = input.shape;
1830
- // }
1842
+ if ( program . usesPackedTextures ) {
1843
+ texData . isPacked = true ;
1844
+ texData . shape = input . shape ;
1845
+ }
1831
1846
} else if ( ! ! texData . isPacked !== ! ! program . usesPackedTextures ) {
1832
1847
let preProcessProgram : UnpackProgram | PackProgram ;
1833
1848
let processedInput : Tensor ;
0 commit comments