Skip to content

Commit 1849b9e

Browse files
committed
Python: Tornado: Handle basic route setup with tuples
The reason this becomes valueable right now, is that we can mark routed params as taint-sources. Longer down the line, we can (hopefully) detect that a routed param will only accept digits, and mark it safe for some of our taint-tracking queries.
1 parent 39d8589 commit 1849b9e

File tree

6 files changed

+182
-32
lines changed

6 files changed

+182
-32
lines changed

python/ql/src/semmle/python/frameworks/Tornado.qll

Lines changed: 152 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow
88
private import semmle.python.dataflow.new.RemoteFlowSources
99
private import semmle.python.dataflow.new.TaintTracking
1010
private import semmle.python.Concepts
11+
private import semmle.python.regex
1112

1213
/**
1314
* Provides models for the `tornado` PyPI package.
@@ -82,7 +83,7 @@ private module Tornado {
8283
* WARNING: Only holds for a few predefined attributes.
8384
*/
8485
private DataFlow::Node web_attr(DataFlow::TypeTracker t, string attr_name) {
85-
attr_name in ["RequestHandler"] and
86+
attr_name in ["RequestHandler", "Application"] and
8687
(
8788
t.start() and
8889
result = DataFlow::importNode("tornado.web" + "." + attr_name)
@@ -138,8 +139,19 @@ private module Tornado {
138139
DataFlow::Node subclassRef() { result = subclassRef(DataFlow::TypeTracker::end()) }
139140

140141
/** A RequestHandler class (most likely in project code). */
141-
private class RequestHandlerClass extends Class {
142+
class RequestHandlerClass extends Class {
142143
RequestHandlerClass() { this.getParent() = subclassRef().asExpr() }
144+
145+
/** Gets a reference to this class. */
146+
private DataFlow::Node getARef(DataFlow::TypeTracker t) {
147+
t.start() and
148+
result.asExpr().(ClassExpr) = this.getParent()
149+
or
150+
exists(DataFlow::TypeTracker t2 | result = this.getARef(t2).track(t2, t))
151+
}
152+
153+
/** Gets a reference to this class. */
154+
DataFlow::Node getARef() { result = this.getARef(DataFlow::TypeTracker::end()) }
143155
}
144156

145157
/**
@@ -229,6 +241,64 @@ private module Tornado {
229241
}
230242
}
231243
}
244+
245+
/**
246+
* Provides models for the `tornado.web.Application` class
247+
*
248+
* See https://www.tornadoweb.org/en/stable/web.html#tornado.web.Application.
249+
*/
250+
module Application {
251+
/** Gets a reference to the `tornado.web.Application` class. */
252+
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
253+
t.start() and
254+
result = web_attr("Application")
255+
or
256+
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
257+
}
258+
259+
/** Gets a reference to the `tornado.web.Application` class. */
260+
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
261+
262+
/**
263+
* A source of instances of `tornado.web.Application`, extend this class to model new instances.
264+
*
265+
* This can include instantiations of the class, return values from function
266+
* calls, or a special parameter that will be set when functions are called by an external
267+
* library.
268+
*
269+
* Use the predicate `Application::instance()` to get references to instances of `tornado.web.Application`.
270+
*/
271+
abstract class InstanceSource extends DataFlow::Node { }
272+
273+
/** A direct instantiation of `tornado.web.Application`. */
274+
class ClassInstantiation extends InstanceSource, DataFlow::CfgNode {
275+
override CallNode node;
276+
277+
ClassInstantiation() { node.getFunction() = classRef().asCfgNode() }
278+
}
279+
280+
/** Gets a reference to an instance of `tornado.web.Application`. */
281+
private DataFlow::Node instance(DataFlow::TypeTracker t) {
282+
t.start() and
283+
result instanceof InstanceSource
284+
or
285+
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
286+
}
287+
288+
/** Gets a reference to an instance of `tornado.web.Application`. */
289+
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
290+
291+
/** Gets a reference to the `add_handlers` method. */
292+
private DataFlow::Node add_handlers(DataFlow::TypeTracker t) {
293+
t.startInAttr("add_handlers") and
294+
result = instance()
295+
or
296+
exists(DataFlow::TypeTracker t2 | result = add_handlers(t2).track(t2, t))
297+
}
298+
299+
/** Gets a reference to the `add_handlers` method. */
300+
DataFlow::Node add_handlers() { result = add_handlers(DataFlow::TypeTracker::end()) }
301+
}
232302
}
233303

234304
// -------------------------------------------------------------------------
@@ -366,4 +436,84 @@ private module Tornado {
366436
}
367437
}
368438
}
439+
440+
// ---------------------------------------------------------------------------
441+
// routing
442+
// ---------------------------------------------------------------------------
443+
/** A sequence that defines a number of route rules */
444+
SequenceNode routeSetupRuleList() {
445+
exists(CallNode call | call = any(tornado::web::Application::ClassInstantiation c).asCfgNode() |
446+
result in [call.getArg(0), call.getArgByName("handlers")]
447+
)
448+
or
449+
exists(CallNode call |
450+
call.getFunction() = tornado::web::Application::add_handlers().asCfgNode()
451+
|
452+
result in [call.getArg(1), call.getArgByName("host_handlers")]
453+
)
454+
or
455+
result = routeSetupRuleList().getElement(_).(TupleNode).getElement(1)
456+
}
457+
458+
/** A tornado route setup. */
459+
abstract class TornadoRouteSetup extends HTTP::Server::RouteSetup::Range { }
460+
461+
/**
462+
* A regex that is used to set up a route.
463+
*
464+
* Needs this subclass to be considered a RegexString.
465+
*/
466+
private class TornadoRouteRegex extends RegexString {
467+
TornadoRouteSetup setup;
468+
469+
TornadoRouteRegex() {
470+
this instanceof StrConst and
471+
DataFlow::localFlow(DataFlow::exprNode(this), setup.getUrlPatternArg())
472+
}
473+
474+
TornadoRouteSetup getRouteSetup() { result = setup }
475+
}
476+
477+
/** A route setup using a tuple. */
478+
private class TornadoTupleRouteSetup extends TornadoRouteSetup, DataFlow::CfgNode {
479+
override TupleNode node;
480+
481+
TornadoTupleRouteSetup() {
482+
node = routeSetupRuleList().getElement(_) and
483+
count(node.getElement(_)) = 2 and
484+
not node.getElement(1) instanceof SequenceNode
485+
}
486+
487+
override DataFlow::Node getUrlPatternArg() { result.asCfgNode() = node.getElement(0) }
488+
489+
override Function getARequestHandler() {
490+
exists(tornado::web::RequestHandler::RequestHandlerClass cls |
491+
cls.getARef().asCfgNode() = node.getElement(1) and
492+
// TODO: Proper MRO
493+
result = cls.getAMethod() and
494+
result.getName() = HTTP::httpVerbLower()
495+
)
496+
}
497+
498+
override Parameter getARoutedParameter() {
499+
// If we don't know the URL pattern, we simply mark all parameters as a routed
500+
// parameter. This should give us more RemoteFlowSources but could also lead to
501+
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
502+
exists(Function requestHandler | requestHandler = this.getARequestHandler() |
503+
not exists(this.getUrlPattern()) and
504+
result in [requestHandler.getArg(_), requestHandler.getArgByName(_)] and
505+
not result = requestHandler.getArg(0)
506+
)
507+
or
508+
exists(Function requestHandler, TornadoRouteRegex regex |
509+
requestHandler = this.getARequestHandler() and
510+
regex.getRouteSetup() = this
511+
|
512+
// first group will have group number 1
513+
result = requestHandler.getArg(regex.getGroupNumber(_, _))
514+
or
515+
result = requestHandler.getArgByName(regex.getGroupName(_, _))
516+
)
517+
}
518+
}
369519
}

python/ql/test/experimental/library-tests/frameworks/tornado/TestTaint.expected

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
| taint_test.py:6 | fail | get | name |
2-
| taint_test.py:6 | fail | get | number |
1+
| taint_test.py:6 | ok | get | name |
2+
| taint_test.py:6 | ok | get | number |
33
| taint_test.py:7 | ok | get | foo |
44
| taint_test.py:11 | ok | get | self.get_argument(..) |
55
| taint_test.py:12 | ok | get | self.get_arguments(..) |

python/ql/test/experimental/library-tests/frameworks/tornado/basic.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,26 @@
22

33

44
class BasicHandler(tornado.web.RequestHandler):
5-
def get(self): # $ MISSING: requestHandler
5+
def get(self): # $ requestHandler
66
self.write("BasicHandler " + self.get_argument("xss"))
77

8-
def post(self): # $ MISSING: requestHandler
8+
def post(self): # $ requestHandler
99
self.write("BasicHandler (POST)")
1010

1111

1212
class DeepInheritance(BasicHandler):
13-
def get(self): # $ MISSING: requestHandler
13+
def get(self): # $ requestHandler
1414
self.write("DeepInheritance" + self.get_argument("also_xss"))
1515

1616

1717
class FormHandler(tornado.web.RequestHandler):
18-
def post(self): # $ MISSING: requestHandler
18+
def post(self): # $ requestHandler
1919
name = self.get_body_argument("name")
2020
self.write(name)
2121

2222

2323
class RedirectHandler(tornado.web.RequestHandler):
24-
def get(self): # $ MISSING: requestHandler
24+
def get(self): # $ requestHandler
2525
req = self.request
2626
h = req.headers
2727
url = h["url"]
@@ -40,11 +40,11 @@ class ReverseInheritance(BaseReverseInheritance):
4040
def make_app():
4141
return tornado.web.Application(
4242
[
43-
(r"/basic", BasicHandler), # $ MISSING: routeSetup="/basic"
44-
(r"/deep", DeepInheritance), # $ MISSING: routeSetup="/deep"
45-
(r"/form", FormHandler), # $ MISSING: routeSetup="/form"
46-
(r"/redirect", RedirectHandler), # $ MISSING: routeSetup="/redirect"
47-
(r"/reverse-inheritance", ReverseInheritance), # $ MISSING: routeSetup="/reverse-inheritance"
43+
(r"/basic", BasicHandler), # $ routeSetup="/basic"
44+
(r"/deep", DeepInheritance), # $ routeSetup="/deep"
45+
(r"/form", FormHandler), # $ routeSetup="/form"
46+
(r"/redirect", RedirectHandler), # $ routeSetup="/redirect"
47+
(r"/reverse-inheritance", ReverseInheritance), # $ routeSetup="/reverse-inheritance"
4848
],
4949
debug=True,
5050
)

python/ql/test/experimental/library-tests/frameworks/tornado/responses.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33

44
class ResponseWriting(tornado.web.RequestHandler):
5-
def get(self, type_): # $ MISSING: requestHandler routedParameter=type_
5+
def get(self, type_): # $ requestHandler routedParameter=type_
66
if type_ == "str":
77
self.write("foo")
88
elif type_ == "bytes":
@@ -17,7 +17,7 @@ def get(self, type_): # $ MISSING: requestHandler routedParameter=type_
1717
def make_app():
1818
return tornado.web.Application(
1919
[
20-
(r"/ResponseWriting/(str|bytes|dict)", ResponseWriting), # $ MISSING: routeSetup="/ResponseWriting/(str|bytes|dict)"
20+
(r"/ResponseWriting/(str|bytes|dict)", ResponseWriting), # $ routeSetup="/ResponseWriting/(str|bytes|dict)"
2121
],
2222
debug=True,
2323
)

python/ql/test/experimental/library-tests/frameworks/tornado/routing_test.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44

55
class FooHandler(tornado.web.RequestHandler):
6-
def get(self, x, y=None, not_used=None): # $ MISSING: requestHandler routedParameter=x routedParameter=y
6+
def get(self, x, y=None, not_used=None): # $ requestHandler routedParameter=x routedParameter=y
77
self.write("FooHandler {} {}".format(x, y))
88

99

@@ -18,32 +18,32 @@ def get(self, x, y=None, not_used=None): # $ MISSING: requestHandler routedPara
1818

1919

2020
class KwArgs(tornado.web.RequestHandler):
21-
def get(self, *, x, y=None, not_used=None): # $ MISSING: requestHandler routedParameter=x routedParameter=y
21+
def get(self, *, x, y=None, not_used=None): # $ requestHandler routedParameter=x routedParameter=y
2222
self.write("KwArgs {} {}".format(x, y))
2323

2424

2525
class OnlyLocalhost(tornado.web.RequestHandler):
26-
def get(self): # $ MISSING: requestHandler
26+
def get(self): # $ requestHandler
2727
self.write("OnlyLocalhost")
2828

2929

3030
class One(tornado.web.RequestHandler):
31-
def get(self): # $ MISSING: requestHandler
31+
def get(self): # $ requestHandler
3232
self.write("One")
3333

3434

3535
class Two(tornado.web.RequestHandler):
36-
def get(self): # $ MISSING: requestHandler
36+
def get(self): # $ requestHandler
3737
self.write("Two")
3838

3939

4040
class Three(tornado.web.RequestHandler):
41-
def get(self): # $ MISSING: requestHandler
41+
def get(self): # $ requestHandler
4242
self.write("Three")
4343

4444

4545
class AddedLater(tornado.web.RequestHandler):
46-
def get(self, x, y=None, not_used=None): # $ MISSING: requestHandler routedParameter=x routedParameter=y
46+
def get(self, x, y=None, not_used=None): # $ requestHandler routedParameter=x routedParameter=y
4747
self.write("AddedLater {} {}".format(x, y))
4848

4949

@@ -59,26 +59,26 @@ def make_app():
5959
# see https://www.tornadoweb.org/en/stable/routing.html for even more examples
6060
app = tornado.web.Application(
6161
[
62-
(r"/foo/([0-9]+)/([0-9]+)?", FooHandler), # $ MISSING: routeSetup="/foo/([0-9]+)/([0-9]+)?"
62+
(r"/foo/([0-9]+)/([0-9]+)?", FooHandler), # $ routeSetup="/foo/([0-9]+)/([0-9]+)?"
6363
tornado.web.URLSpec(r"/bar/([0-9]+)/([0-9]+)?", BarHandler), # $ MISSING: routeSetup="/bar/([0-9]+)/([0-9]+)?"
6464
# Very verbose way to write same as FooHandler
6565
tornado.routing.Rule(tornado.routing.PathMatches(r"/baz/([0-9]+)/([0-9]+)?"), BazHandler), # $ MISSING: routeSetup="/baz/([0-9]+)/([0-9]+)?"
66-
(r"/kw-args/(?P<x>[0-9]+)/(?P<y>[0-9]+)?", KwArgs), # $ MISSING: routeSetup="/kw-args/(?P<x>[0-9]+)/(?P<y>[0-9]+)?"
66+
(r"/kw-args/(?P<x>[0-9]+)/(?P<y>[0-9]+)?", KwArgs), # $ routeSetup="/kw-args/(?P<x>[0-9]+)/(?P<y>[0-9]+)?"
6767
# You can do nesting
6868
(r"/(one|two|three)", [
69-
(r"/one", One), # $ MISSING: routeSetup="/one"
70-
(r"/two", Two), # $ MISSING: routeSetup="/two"
71-
(r"/three", Three) # $ MISSING: routeSetup="/three"
69+
(r"/one", One), # $ routeSetup="/one"
70+
(r"/two", Two), # $ routeSetup="/two"
71+
(r"/three", Three) # $ routeSetup="/three"
7272
]),
7373
# which is _one_ recommended way to ensure known host is used
7474
(tornado.routing.HostMatches(r"(localhost|127\.0\.0\.1)"), [
75-
("/only-localhost", OnlyLocalhost) # $ MISSING: routeSetup="/only-localhost"
75+
("/only-localhost", OnlyLocalhost) # $ routeSetup="/only-localhost"
7676
]),
7777

7878
],
7979
debug=True,
8080
)
81-
app.add_handlers(r".*", [(r"/added-later/([0-9]+)/([0-9]+)?", AddedLater)]) # $ MISSING: routeSetup="/added-later/([0-9]+)/([0-9]+)?"
81+
app.add_handlers(r".*", [(r"/added-later/([0-9]+)/([0-9]+)?", AddedLater)]) # $ routeSetup="/added-later/([0-9]+)/([0-9]+)?"
8282
return app
8383

8484

python/ql/test/experimental/library-tests/frameworks/tornado/taint_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33

44
class TaintTest(tornado.web.RequestHandler):
5-
def get(self, name = "World!", number="0", foo="foo"): # $ MISSING: requestHandler routedParameter=name routedParameter=number
5+
def get(self, name = "World!", number="0", foo="foo"): # $ requestHandler routedParameter=name routedParameter=number
66
ensure_tainted(name, number)
77
ensure_not_tainted(foo)
88

@@ -72,7 +72,7 @@ def get(self, name = "World!", number="0", foo="foo"): # $ MISSING: requestHand
7272
def make_app():
7373
return tornado.web.Application(
7474
[
75-
(r"/test_taint/([^/]+)/([0-9]+)", TaintTest), # $ MISSING: routeSetup="/test_taint/([^/]+)/([0-9]+)"
75+
(r"/test_taint/([^/]+)/([0-9]+)", TaintTest), # $ routeSetup="/test_taint/([^/]+)/([0-9]+)"
7676
],
7777
debug=True,
7878
)

0 commit comments

Comments
 (0)