Skip to content

Commit 4641150

Browse files
committed
Python: Basic taint-modeling of tornado.web.RequestHandler classes
1 parent 9cd8a86 commit 4641150

File tree

3 files changed

+242
-14
lines changed

3 files changed

+242
-14
lines changed

python/ql/src/semmle/python/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ private import semmle.python.frameworks.MySQLdb
1212
private import semmle.python.frameworks.Psycopg2
1313
private import semmle.python.frameworks.PyMySQL
1414
private import semmle.python.frameworks.Stdlib
15+
private import semmle.python.frameworks.Tornado
1516
private import semmle.python.frameworks.Yaml
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `tornado` PyPI package.
3+
* See https://www.tornadoweb.org/en/stable/.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.dataflow.new.RemoteFlowSources
9+
private import semmle.python.dataflow.new.TaintTracking
10+
private import semmle.python.Concepts
11+
12+
/**
13+
* Provides models for the `tornado` PyPI package.
14+
* See https://www.tornadoweb.org/en/stable/.
15+
*/
16+
private module Tornado {
17+
// ---------------------------------------------------------------------------
18+
// tornado
19+
// ---------------------------------------------------------------------------
20+
/** Gets a reference to the `tornado` module. */
21+
private DataFlow::Node tornado(DataFlow::TypeTracker t) {
22+
t.start() and
23+
result = DataFlow::importNode("tornado")
24+
or
25+
exists(DataFlow::TypeTracker t2 | result = tornado(t2).track(t2, t))
26+
}
27+
28+
/** Gets a reference to the `tornado` module. */
29+
DataFlow::Node tornado() { result = tornado(DataFlow::TypeTracker::end()) }
30+
31+
/**
32+
* Gets a reference to the attribute `attr_name` of the `tornado` module.
33+
* WARNING: Only holds for a few predefined attributes.
34+
*/
35+
private DataFlow::Node tornado_attr(DataFlow::TypeTracker t, string attr_name) {
36+
attr_name in ["web"] and
37+
(
38+
t.start() and
39+
result = DataFlow::importNode("tornado" + "." + attr_name)
40+
or
41+
t.startInAttr(attr_name) and
42+
result = tornado()
43+
)
44+
or
45+
// Due to bad performance when using normal setup with `tornado_attr(t2, attr_name).track(t2, t)`
46+
// we have inlined that code and forced a join
47+
exists(DataFlow::TypeTracker t2 |
48+
exists(DataFlow::StepSummary summary |
49+
tornado_attr_first_join(t2, attr_name, result, summary) and
50+
t = t2.append(summary)
51+
)
52+
)
53+
}
54+
55+
pragma[nomagic]
56+
private predicate tornado_attr_first_join(
57+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
58+
) {
59+
DataFlow::StepSummary::step(tornado_attr(t2, attr_name), res, summary)
60+
}
61+
62+
/**
63+
* Gets a reference to the attribute `attr_name` of the `tornado` module.
64+
* WARNING: Only holds for a few predefined attributes.
65+
*/
66+
private DataFlow::Node tornado_attr(string attr_name) {
67+
result = tornado_attr(DataFlow::TypeTracker::end(), attr_name)
68+
}
69+
70+
/** Provides models for the `tornado` module. */
71+
module tornado {
72+
// -------------------------------------------------------------------------
73+
// tornado.web
74+
// -------------------------------------------------------------------------
75+
/** Gets a reference to the `tornado.web` module. */
76+
DataFlow::Node web() { result = tornado_attr("web") }
77+
78+
/** Provides models for the `tornado.web` module */
79+
module web {
80+
/**
81+
* Gets a reference to the attribute `attr_name` of the `tornado.web` module.
82+
* WARNING: Only holds for a few predefined attributes.
83+
*/
84+
private DataFlow::Node web_attr(DataFlow::TypeTracker t, string attr_name) {
85+
attr_name in ["RequestHandler"] and
86+
(
87+
t.start() and
88+
result = DataFlow::importNode("tornado.web" + "." + attr_name)
89+
or
90+
t.startInAttr(attr_name) and
91+
result = web()
92+
)
93+
or
94+
// Due to bad performance when using normal setup with `web_attr(t2, attr_name).track(t2, t)`
95+
// we have inlined that code and forced a join
96+
exists(DataFlow::TypeTracker t2 |
97+
exists(DataFlow::StepSummary summary |
98+
web_attr_first_join(t2, attr_name, result, summary) and
99+
t = t2.append(summary)
100+
)
101+
)
102+
}
103+
104+
pragma[nomagic]
105+
private predicate web_attr_first_join(
106+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res,
107+
DataFlow::StepSummary summary
108+
) {
109+
DataFlow::StepSummary::step(web_attr(t2, attr_name), res, summary)
110+
}
111+
112+
/**
113+
* Gets a reference to the attribute `attr_name` of the `tornado.web` module.
114+
* WARNING: Only holds for a few predefined attributes.
115+
*/
116+
private DataFlow::Node web_attr(string attr_name) {
117+
result = web_attr(DataFlow::TypeTracker::end(), attr_name)
118+
}
119+
120+
/**
121+
* Provides models for the `tornado.web.RequestHandler` class and subclasses.
122+
*
123+
* See https://www.tornadoweb.org/en/stable/web.html#tornado.web.RequestHandler.
124+
*/
125+
module RequestHandler {
126+
/** Gets a reference to the `tornado.web.RequestHandler` class or any subclass. */
127+
private DataFlow::Node subclassRef(DataFlow::TypeTracker t) {
128+
t.start() and
129+
result = web_attr("RequestHandler")
130+
or
131+
// subclasses in project code
132+
result.asExpr().(ClassExpr).getABase() = subclassRef(t.continue()).asExpr()
133+
or
134+
exists(DataFlow::TypeTracker t2 | result = subclassRef(t2).track(t2, t))
135+
}
136+
137+
/** Gets a reference to the `tornado.web.RequestHandler` class or any subclass. */
138+
DataFlow::Node subclassRef() { result = subclassRef(DataFlow::TypeTracker::end()) }
139+
140+
/** A RequestHandler class (most likely in project code). */
141+
private class RequestHandlerClass extends Class {
142+
RequestHandlerClass() { this.getParent() = subclassRef().asExpr() }
143+
}
144+
145+
/**
146+
* A source of instances of the `tornado.web.RequestHandler` class or any subclass, extend this class to model new instances.
147+
*
148+
* This can include instantiations of the class, return values from function
149+
* calls, or a special parameter that will be set when functions are called by an external
150+
* library.
151+
*
152+
* Use the predicate `RequestHandler::instance()` to get references to instances of the `tornado.web.RequestHandler` class or any subclass.
153+
*/
154+
abstract class InstanceSource extends DataFlow::Node { }
155+
156+
/** The `self` parameter in a method on the `tornado.web.RequestHandler` class or any subclass. */
157+
private class SelfParam extends InstanceSource, RemoteFlowSource::Range,
158+
DataFlow::ParameterNode {
159+
SelfParam() {
160+
exists(RequestHandlerClass cls | cls.getAMethod().getArg(0) = this.getParameter())
161+
}
162+
163+
override string getSourceType() { result = "tornado.web.RequestHandler" }
164+
}
165+
166+
/** Gets a reference to an instance of the `tornado.web.RequestHandler` class or any subclass. */
167+
private DataFlow::Node instance(DataFlow::TypeTracker t) {
168+
t.start() and
169+
result instanceof InstanceSource
170+
or
171+
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
172+
}
173+
174+
/** Gets a reference to an instance of the `tornado.web.RequestHandler` class or any subclass. */
175+
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
176+
177+
/** Gets a reference to one of the methods `get_argument`, `get_body_argument`, `get_query_argument`. */
178+
private DataFlow::Node argumentMethod(DataFlow::TypeTracker t) {
179+
t.startInAttr(["get_argument", "get_body_argument", "get_query_argument"]) and
180+
result = instance()
181+
or
182+
exists(DataFlow::TypeTracker t2 | result = argumentMethod(t2).track(t2, t))
183+
}
184+
185+
/** Gets a reference to one of the methods `get_argument`, `get_body_argument`, `get_query_argument`. */
186+
DataFlow::Node argumentMethod() { result = argumentMethod(DataFlow::TypeTracker::end()) }
187+
188+
/** Gets a reference to one of the methods `get_arguments`, `get_body_arguments`, `get_query_arguments`. */
189+
private DataFlow::Node argumentsMethod(DataFlow::TypeTracker t) {
190+
t.startInAttr(["get_arguments", "get_body_arguments", "get_query_arguments"]) and
191+
result = instance()
192+
or
193+
exists(DataFlow::TypeTracker t2 | result = argumentsMethod(t2).track(t2, t))
194+
}
195+
196+
/** Gets a reference to one of the methods `get_arguments`, `get_body_arguments`, `get_query_arguments`. */
197+
DataFlow::Node argumentsMethod() { result = argumentsMethod(DataFlow::TypeTracker::end()) }
198+
199+
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
200+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
201+
// Method access
202+
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
203+
nodeFrom = instance() and
204+
nodeTo in [argumentMethod(), argumentsMethod()]
205+
or
206+
// Method call
207+
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode() and
208+
nodeFrom in [argumentMethod(), argumentsMethod()]
209+
or
210+
// Attributes
211+
nodeFrom = instance() and
212+
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
213+
read.getAttributeName() in [
214+
// List[str]
215+
"path_args",
216+
// Dict[str, str]
217+
"path_kwargs",
218+
// tornado.httputil.HTTPServerRequest
219+
"request"
220+
]
221+
)
222+
}
223+
}
224+
}
225+
}
226+
}
227+
}

python/ql/test/experimental/library-tests/frameworks/tornado/TestTaint.expected

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
| taint_test.py:6 | fail | get | name |
22
| taint_test.py:6 | fail | get | number |
33
| taint_test.py:7 | ok | get | foo |
4-
| taint_test.py:11 | fail | get | self.get_argument(..) |
5-
| taint_test.py:12 | fail | get | self.get_arguments(..) |
6-
| taint_test.py:13 | fail | get | self.get_arguments(..)[0] |
7-
| taint_test.py:15 | fail | get | self.get_body_argument(..) |
8-
| taint_test.py:16 | fail | get | self.get_body_arguments(..) |
9-
| taint_test.py:17 | fail | get | self.get_body_arguments(..)[0] |
10-
| taint_test.py:19 | fail | get | self.get_query_argument(..) |
11-
| taint_test.py:20 | fail | get | self.get_query_arguments(..) |
12-
| taint_test.py:21 | fail | get | self.get_query_arguments(..)[0] |
13-
| taint_test.py:23 | fail | get | self.path_args |
14-
| taint_test.py:24 | fail | get | self.path_args[0] |
15-
| taint_test.py:26 | fail | get | self.path_kwargs |
16-
| taint_test.py:27 | fail | get | self.path_kwargs["name"] |
17-
| taint_test.py:34 | fail | get | request |
4+
| taint_test.py:11 | ok | get | self.get_argument(..) |
5+
| taint_test.py:12 | ok | get | self.get_arguments(..) |
6+
| taint_test.py:13 | ok | get | self.get_arguments(..)[0] |
7+
| taint_test.py:15 | ok | get | self.get_body_argument(..) |
8+
| taint_test.py:16 | ok | get | self.get_body_arguments(..) |
9+
| taint_test.py:17 | ok | get | self.get_body_arguments(..)[0] |
10+
| taint_test.py:19 | ok | get | self.get_query_argument(..) |
11+
| taint_test.py:20 | ok | get | self.get_query_arguments(..) |
12+
| taint_test.py:21 | ok | get | self.get_query_arguments(..)[0] |
13+
| taint_test.py:23 | ok | get | self.path_args |
14+
| taint_test.py:24 | ok | get | self.path_args[0] |
15+
| taint_test.py:26 | ok | get | self.path_kwargs |
16+
| taint_test.py:27 | ok | get | self.path_kwargs["name"] |
17+
| taint_test.py:34 | ok | get | request |
1818
| taint_test.py:36 | fail | get | request.uri |
1919
| taint_test.py:37 | fail | get | request.path |
2020
| taint_test.py:38 | fail | get | request.query |

0 commit comments

Comments
 (0)