|
| 1 | +Using Subtests and Sub-benchmarks |
| 2 | +23 Aug 2016 |
| 3 | +Tags: testing, hierarchy, table-driven, subtests, sub-benchmarks |
| 4 | + |
| 5 | +Marcel van Lohuizen |
| 6 | + |
| 7 | +* Introduction |
| 8 | + |
| 9 | +In Go 1.7, the `testing` package introduces a Run method on the |
| 10 | +[[https://golang.org/pkg/testing/#T.Run][`T`]] and |
| 11 | +[[https://golang.org/pkg/testing/#B.Run][`B`]] types |
| 12 | +that allows for the creation of subtests and sub-benchmarks. |
| 13 | +The introduction of subtests and sub-benchmarks enables better handling of |
| 14 | +failures, fine-grained control of which tests to run from the command line, |
| 15 | +control of parallelism, and often results in simpler and more maintainable code. |
| 16 | + |
| 17 | +* Table-driven tests basics |
| 18 | + |
| 19 | +Before digging into the details, let's first discuss a common |
| 20 | +way of writing tests in Go. |
| 21 | +A series of related checks can be implemented by looping over a slice of test |
| 22 | +cases: |
| 23 | + |
| 24 | + func TestTime(t *testing.T) { |
| 25 | + testCases := []struct { |
| 26 | + gmt string |
| 27 | + loc string |
| 28 | + want string |
| 29 | + }{ |
| 30 | + {"12:31", "Europe/Zuri", "13:31"}, // incorrect location name |
| 31 | + {"12:31", "America/New_York", "7:31"}, // should be 07:31 |
| 32 | + {"08:08", "Australia/Sydney", "18:08"}, |
| 33 | + } |
| 34 | + for _, tc := range testCases { |
| 35 | + loc, err := time.LoadLocation(tc.loc) |
| 36 | + if err != nil { |
| 37 | + t.Fatalf("could not load location %q", tc.loc) |
| 38 | + } |
| 39 | + gmt, _ := time.Parse("15:04", tc.gmt) |
| 40 | + if got := gmt.In(loc).Format("15:04"); got != tc.want { |
| 41 | + t.Errorf("In(%s, %s) = %s; want %s", tc.gmt, tc.loc, got, tc.want) |
| 42 | + } |
| 43 | + } |
| 44 | + } |
| 45 | + |
| 46 | +This approach, commonly referred to as table-driven tests, reduces the amount |
| 47 | +of repetitive code compared to repeating the same code for each test |
| 48 | +and makes it straightforward to add more test cases. |
| 49 | + |
| 50 | +* Table-driven benchmarks |
| 51 | + |
| 52 | +Before Go 1.7 it was not possible to use the same table-driven approach for |
| 53 | +benchmarks. |
| 54 | +A benchmark tests the performance of an entire function, so iterating over |
| 55 | +benchmarks would just measure all of them as a single benchmark. |
| 56 | + |
| 57 | +A common workaround was to define separate top-level benchmarks |
| 58 | +that each call a common function with different parameters. |
| 59 | +For instance, before 1.7 the `strconv` package's benchmarks for `AppendFloat` |
| 60 | +looked something like this: |
| 61 | + |
| 62 | + func benchmarkAppendFloat(b *testing.B, f float64, fmt byte, prec, bitSize int) { |
| 63 | + dst := make([]byte, 30) |
| 64 | + b.ResetTimer() // Overkill here, but for illustrative purposes. |
| 65 | + for i := 0; i < b.N; i++ { |
| 66 | + AppendFloat(dst[:0], f, fmt, prec, bitSize) |
| 67 | + } |
| 68 | + } |
| 69 | + |
| 70 | + func BenchmarkAppendFloatDecimal(b *testing.B) { benchmarkAppendFloat(b, 33909, 'g', -1, 64) } |
| 71 | + func BenchmarkAppendFloat(b *testing.B) { benchmarkAppendFloat(b, 339.7784, 'g', -1, 64) } |
| 72 | + func BenchmarkAppendFloatExp(b *testing.B) { benchmarkAppendFloat(b, -5.09e75, 'g', -1, 64) } |
| 73 | + func BenchmarkAppendFloatNegExp(b *testing.B) { benchmarkAppendFloat(b, -5.11e-95, 'g', -1, 64) } |
| 74 | + func BenchmarkAppendFloatBig(b *testing.B) { benchmarkAppendFloat(b, 123456789123456789123456789, 'g', -1, 64) } |
| 75 | + ... |
| 76 | + |
| 77 | +Using the `Run` method available in Go 1.7, the same set of benchmarks is now |
| 78 | +expressed as a single top-level benchmark: |
| 79 | + |
| 80 | + func BenchmarkAppendFloat(b *testing.B) { |
| 81 | + benchmarks := []struct{ |
| 82 | + name string |
| 83 | + float float64 |
| 84 | + fmt byte |
| 85 | + prec int |
| 86 | + bitSize int |
| 87 | + }{ |
| 88 | + {"Decimal", 33909, 'g', -1, 64}, |
| 89 | + {"Float", 339.7784, 'g', -1, 64}, |
| 90 | + {"Exp", -5.09e75, 'g', -1, 64}, |
| 91 | + {"NegExp", -5.11e-95, 'g', -1, 64}, |
| 92 | + {"Big", 123456789123456789123456789, 'g', -1, 64}, |
| 93 | + ... |
| 94 | + } |
| 95 | + dst := make([]byte, 30) |
| 96 | + for _, bm := range benchmarks { |
| 97 | + b.Run(bm.name, func(b *testing.B) { |
| 98 | + for i := 0; i < b.N; i++ { |
| 99 | + AppendFloat(dst[:0], bm.float, bm.fmt, bm.prec, bm.bitSize) |
| 100 | + } |
| 101 | + }) |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | +Each invocation of the `Run` method creates a separate benchmark. |
| 106 | +An enclosing benchmark function that calls a `Run` method is only run once and |
| 107 | +is not measured. |
| 108 | + |
| 109 | +The new code has more lines of code, but is more maintainable, more readable, |
| 110 | +and consistent with the table-driven approach commonly used for testing. |
| 111 | +Moreover, common setup code is now shared between runs while eliminating the |
| 112 | +need to reset the timer. |
| 113 | + |
| 114 | + |
| 115 | + |
| 116 | +* Table-driven tests using subtests |
| 117 | + |
| 118 | +Go 1.7 also introduces a `Run` method for creating subtests. |
| 119 | +This test is a rewritten version of our earlier example using subtests: |
| 120 | + |
| 121 | + func TestTime(t *testing.T) { |
| 122 | + testCases := []struct { |
| 123 | + gmt string |
| 124 | + loc string |
| 125 | + want string |
| 126 | + }{ |
| 127 | + {"12:31", "Europe/Zuri", "13:31"}, |
| 128 | + {"12:31", "America/New_York", "7:31"}, |
| 129 | + {"08:08", "Australia/Sydney", "18:08"}, |
| 130 | + } |
| 131 | + for _, tc := range testCases { |
| 132 | + t.Run(fmt.Sprintf("%s in %s", tc.gmt, tc.loc), func(t *testing.T) { |
| 133 | + loc, err := time.LoadLocation(tc.loc) |
| 134 | + if err != nil { |
| 135 | + t.Fatal("could not load location") |
| 136 | + } |
| 137 | + gmt, _ := time.Parse("15:04", tc.gmt) |
| 138 | + if got := gmt.In(loc).Format("15:04"); got != tc.want { |
| 139 | + t.Errorf("got %s; want %s", got, tc.want) |
| 140 | + } |
| 141 | + }) |
| 142 | + } |
| 143 | + } |
| 144 | + |
| 145 | +The first thing to note is the difference in output from the two implementations. |
| 146 | +The original implementation prints: |
| 147 | + |
| 148 | + --- FAIL: TestTime (0.00s) |
| 149 | + time_test.go:62: could not load location "Europe/Zuri" |
| 150 | + |
| 151 | +Even though there are two errors, execution of the test halts on the call to |
| 152 | +`Fatalf` and the second test never runs. |
| 153 | + |
| 154 | +The implementation using `Run` prints both: |
| 155 | + |
| 156 | + --- FAIL: TestTime (0.00s) |
| 157 | + --- FAIL: TestTime/12:31_in_Europe/Zuri (0.00s) |
| 158 | + time_test.go:84: could not load location |
| 159 | + --- FAIL: TestTime/12:31_in_America/New_York (0.00s) |
| 160 | + time_test.go:88: got 07:31; want 7:31 |
| 161 | + |
| 162 | +`Fatal` and its siblings causes a subtest to be skipped but not its parent or |
| 163 | +subsequent subtests. |
| 164 | + |
| 165 | +Another thing to note is the shorter error messages in the new implementation. |
| 166 | +Since the subtest name uniquely identifies the subtest there is no need to |
| 167 | +identify the test again within the error messages. |
| 168 | + |
| 169 | +There are several other benefits to using subtests or sub-benchmarks, |
| 170 | +as clarified by the following sections. |
| 171 | + |
| 172 | + |
| 173 | +* Running specific tests or benchmarks |
| 174 | + |
| 175 | +Both subtests and sub-benchmarks can be singled out on the command line using |
| 176 | +the [[https://golang.org/cmd/go/#hdr-Description_of_testing_flags][`-run` or `-bench` flag]]. |
| 177 | +Both flags take a slash-separated list of regular expressions that match the |
| 178 | +corresponding parts of the full name of the subtest or sub-benchmark. |
| 179 | + |
| 180 | +The full name of a subtest or sub-benchmark is a slash-separated list of |
| 181 | +its name and the names of all of its parents, starting with the top-level. |
| 182 | +The name is the corresponding function name for top-level tests and benchmarks, |
| 183 | +and the first argument to `Run` otherwise. |
| 184 | +To avoid display and parsing issues, a name is sanitized by replacing spaces |
| 185 | +with underscores and escaping non-printable characters. |
| 186 | +The same sanitizing is applied to the regular expressions passed to |
| 187 | +the `-run` or `-bench` flags. |
| 188 | + |
| 189 | +A few examples: |
| 190 | + |
| 191 | +Run tests that use a timezone in Europe: |
| 192 | + |
| 193 | + $ go test -run=TestTime/"in Europe" |
| 194 | + --- FAIL: TestTime (0.00s) |
| 195 | + --- FAIL: TestTime/12:31_in_Europe/Zuri (0.00s) |
| 196 | + time_test.go:85: could not load location |
| 197 | + |
| 198 | +Run only tests for times after noon: |
| 199 | + |
| 200 | + $ go test -run=Time/12:[0-9] -v |
| 201 | + === RUN TestTime |
| 202 | + === RUN TestTime/12:31_in_Europe/Zuri |
| 203 | + === RUN TestTime/12:31_in_America/New_York |
| 204 | + --- FAIL: TestTime (0.00s) |
| 205 | + --- FAIL: TestTime/12:31_in_Europe/Zuri (0.00s) |
| 206 | + time_test.go:85: could not load location |
| 207 | + --- FAIL: TestTime/12:31_in_America/New_York (0.00s) |
| 208 | + time_test.go:89: got 07:31; want 7:31 |
| 209 | + |
| 210 | +Perhaps a bit surprising, using `-run=TestTime/New_York` won't match any tests. |
| 211 | +This is because the slash present in the location names is treated as |
| 212 | +a separator as well. |
| 213 | +Instead use: |
| 214 | + |
| 215 | + $ go test -run=Time//New_York |
| 216 | + --- FAIL: TestTime (0.00s) |
| 217 | + --- FAIL: TestTime/12:31_in_America/New_York (0.00s) |
| 218 | + time_test.go:88: got 07:31; want 7:31 |
| 219 | + |
| 220 | +Note the `//` in the string passed to `-run`. |
| 221 | +The `/` in time zone name `America/New_York` is handled as if it were |
| 222 | +a separator resulting from a subtest. |
| 223 | +The first regular expression of the pattern (`TestTime`) matches the top-level |
| 224 | +test. |
| 225 | +The second regular expression (the empty string) matches anything, in this case |
| 226 | +the time and the continent part of the location. |
| 227 | +The third regular expression (`New_York`) matches the city part of the location. |
| 228 | + |
| 229 | +Treating slashes in names as separators allows the user to refactor |
| 230 | +hierarchies of tests without the need to change the naming. |
| 231 | +It also simplifies the escaping rules. |
| 232 | +The user should escape slashes in names, for instance by replacing them with |
| 233 | +backslashes, if this poses a problem. |
| 234 | + |
| 235 | +A unique sequence number is appended to test names that are not unique. |
| 236 | +So one could just pass an empty string to `Run` |
| 237 | +if there is no obvious naming scheme for subtests and the subtests |
| 238 | +can easily be identified by their sequence number. |
| 239 | + |
| 240 | +* Setup and Tear-down |
| 241 | + |
| 242 | +Subtests and sub-benchmarks can be used to manage common setup and tear-down code: |
| 243 | + |
| 244 | + func TestFoo(t *testing.T) { |
| 245 | + // <setup code> |
| 246 | + t.Run("A=1", func(t *testing.T) { ... }) |
| 247 | + t.Run("A=2", func(t *testing.T) { ... }) |
| 248 | + t.Run("B=1", func(t *testing.T) { |
| 249 | + if !test(foo{B:1}) { |
| 250 | + t.Fail() |
| 251 | + } |
| 252 | + }) |
| 253 | + // <tear-down code> |
| 254 | + } |
| 255 | + |
| 256 | +The setup and tear-down code will run if any of the enclosed subtests are run |
| 257 | +and will run at most once. |
| 258 | +This applies even if any of the subtests calls `Skip`, `Fail`, or `Fatal`. |
| 259 | + |
| 260 | +* Control of Parallelism |
| 261 | + |
| 262 | +Subtests allow fine-grained control over parallelism. |
| 263 | +To understand how to use subtests in the way |
| 264 | +it is important to understand the semantics of parallel tests. |
| 265 | + |
| 266 | +Each test is associated with a test function. |
| 267 | +A test is called a parallel test if its test function calls the Parallel |
| 268 | +method on its instance of `testing.T`. |
| 269 | +A parallel test never runs concurrently with a sequential test and its execution |
| 270 | +is suspended until its calling test function, that of the parent test, |
| 271 | +has returned. |
| 272 | +The `-parallel` flag defines the maximum number of parallel tests that can run |
| 273 | +in parallel. |
| 274 | + |
| 275 | +A test blocks until its test function returns and all of its subtests |
| 276 | +have completed. |
| 277 | +This means that the parallel tests that are run by a sequential test will |
| 278 | +complete before any other consecutive sequential test is run. |
| 279 | + |
| 280 | +This behavior is identical for tests created by `Run` and top-level tests. |
| 281 | +In fact, under the hood top-level tests are implemented as subtests of |
| 282 | +a hidden master test. |
| 283 | + |
| 284 | +** Run a group of tests in parallel |
| 285 | + |
| 286 | +The above semantics allows for running a group of tests in parallel with |
| 287 | +each other but not with other parallel tests: |
| 288 | + |
| 289 | + func TestGroupedParallel(t *testing.T) { |
| 290 | + for _, tc := range testCases { |
| 291 | + tc := tc // capture range variable |
| 292 | + t.Run(tc.Name, func(t *testing.T) { |
| 293 | + t.Parallel() |
| 294 | + if got := foo(tc.in); got != tc.out { |
| 295 | + t.Errorf("got %v; want %v", got, tc.out) |
| 296 | + } |
| 297 | + ... |
| 298 | + }) |
| 299 | + } |
| 300 | + } |
| 301 | + |
| 302 | +The outer test will not complete until all parallel tests started by `Run` |
| 303 | +have completed. |
| 304 | +As a result, no other parallel tests can run in parallel to these parallel tests. |
| 305 | + |
| 306 | +Note that we need to capture the range variable to ensure that `tc` gets bound to |
| 307 | +the correct instance. |
| 308 | + |
| 309 | + |
| 310 | +** Cleaning up after a group of parallel tests |
| 311 | + |
| 312 | +In the previous example we used the semantics to wait on a group of parallel |
| 313 | +tests to complete before commencing other tests. |
| 314 | +The same technique can be used to clean up after a group of parallel tests |
| 315 | +that share common resources: |
| 316 | + |
| 317 | + func TestTeardownParallel(t *testing.T) { |
| 318 | + // <setup code> |
| 319 | + // This Run will not return until its parallel subtests complete. |
| 320 | + t.Run("group", func(t *testing.T) { |
| 321 | + t.Run("Test1", parallelTest1) |
| 322 | + t.Run("Test2", parallelTest2) |
| 323 | + t.Run("Test3", parallelTest3) |
| 324 | + }) |
| 325 | + // <tear-down code> |
| 326 | + } |
| 327 | + |
| 328 | +The behavior of waiting on a group of parallel tests is identical to that |
| 329 | +of the previous example. |
| 330 | + |
| 331 | +* Conclusion |
| 332 | + |
| 333 | +Go 1.7's addition of subtests and sub-benchmarks allows you to write structured |
| 334 | +tests and benchmarks in a natural way that blends nicely into the existing |
| 335 | +tools. |
| 336 | +One way to think about this is that earlier versions of the testing package had |
| 337 | +a 1-level hierarchy: the package-level test was structured as a set of |
| 338 | +individual tests and benchmarks. |
| 339 | +Now that structure has been extended to those individual tests and benchmarks, |
| 340 | +recursively. |
| 341 | +In fact, in the implementation, the top-level tests and benchmarks are tracked |
| 342 | +as if they were subtests and sub-benchmarks of an implicit master test and |
| 343 | +benchmark: the treatment really is the same at all levels. |
| 344 | + |
| 345 | +The ability for tests to define this structure enables fine-grained execution of |
| 346 | +specific test cases, shared setup and teardown, and better control over test |
| 347 | +parallelism. |
| 348 | +We are excited to see what other uses people find. Enjoy. |
0 commit comments